On Thu, 12 Jul 2001, Bruce Momjian wrote: > Your patch has been added to the PostgreSQL unapplied patches list at: Here is a new version of that patch, with improvements from some feedback I got from Barry Lind. More of the encoding-related logic is moved into the Encoding class and I've added som clarifying comments. /Anders > > With this patch I've done an attempt to make the handling of character > > encoding in the JDBC driver a little clearer. > > > > * Cleans up the logic to select a JVM encoding for a backend encoding. > > * Makes the connection setup code easier to read. > > * Gathers character encoding and decoding in a single place. > > * Adds unit tests for encoding. > > * Introduces a new class, org.postgresql.core.Encoding, and the > > corresponding unit test class, org.postgresql.test.jdbc2.EncodingTest. _____________________________________________________________________ A n d e r s B e n g t s s o n [EMAIL PROTECTED] Stockholm, Sweden
*** ./src/interfaces/jdbc/org/postgresql/Connection.java.orig Tue Jul 10 22:09:49 2001 --- ./src/interfaces/jdbc/org/postgresql/Connection.java Wed Jul 11 23:02:58 2001 *************** *** 8,13 **** --- 8,14 ---- import org.postgresql.fastpath.*; import org.postgresql.largeobject.*; import org.postgresql.util.*; + import org.postgresql.core.Encoding; /** * $Id: Connection.java,v 1.17 2001/06/07 00:09:32 momjian Exp $ *************** *** 33,43 **** /** * The encoding to use for this connection. - * If <b>null</b>, the encoding has not been specified by the - * user, and the default encoding for the platform should be - * used. */ ! private String encoding; public boolean CONNECTION_OK = true; public boolean CONNECTION_BAD = false; --- 34,41 ---- /** * The encoding to use for this connection. */ ! private Encoding encoding = Encoding.defaultEncoding(); public boolean CONNECTION_OK = true; public boolean CONNECTION_BAD = false; *************** *** 168,174 **** // "User authentication failed" // throw new SQLException(pg_stream.ReceiveString ! (receive_sbuf, 4096, getEncoding())); case 'R': // Get the type of request --- 166,172 ---- // "User authentication failed" // throw new SQLException(pg_stream.ReceiveString ! (receive_sbuf, 4096, encoding)); case 'R': // Get the type of request *************** *** 239,245 **** case 'E': case 'N': throw new SQLException(pg_stream.ReceiveString ! (receive_sbuf, 4096, getEncoding())); default: throw new PSQLException("postgresql.con.setup"); } --- 237,243 ---- case 'E': case 'N': throw new SQLException(pg_stream.ReceiveString ! (receive_sbuf, 4096, encoding)); default: throw new PSQLException("postgresql.con.setup"); } *************** *** 251,361 **** break; case 'E': case 'N': ! throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096, getEncoding())); default: throw new PSQLException("postgresql.con.setup"); } - // Originally we issued a SHOW DATESTYLE statement to find the databases default - // datestyle. However, this caused some problems with timestamps, so in 6.5, we - // went the way of ODBC, and set the connection to ISO. - // - // This may cause some clients to break when they assume anything other than ISO, - // but then - they should be using the proper methods ;-) - // - // We also ask the DB for certain properties (i.e. DatabaseEncoding at this time) - // firstWarning = null; ! java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; " + ! "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else getdatabaseencoding() end"); ! String dbEncoding = null; ! //retrieve DB properties ! if(initrset.next()) { ! ! //handle DatabaseEncoding ! dbEncoding = initrset.getString(1); ! //convert from the PostgreSQL name to the Java name ! if (dbEncoding.equals("SQL_ASCII")) { ! dbEncoding = "ASCII"; ! } else if (dbEncoding.equals("UNICODE")) { ! dbEncoding = "UTF8"; ! } else if (dbEncoding.equals("LATIN1")) { ! dbEncoding = "ISO8859_1"; ! } else if (dbEncoding.equals("LATIN2")) { ! dbEncoding = "ISO8859_2"; ! } else if (dbEncoding.equals("LATIN3")) { ! dbEncoding = "ISO8859_3"; ! } else if (dbEncoding.equals("LATIN4")) { ! dbEncoding = "ISO8859_4"; ! } else if (dbEncoding.equals("LATIN5")) { ! dbEncoding = "ISO8859_5"; ! } else if (dbEncoding.equals("LATIN6")) { ! dbEncoding = "ISO8859_6"; ! } else if (dbEncoding.equals("LATIN7")) { ! dbEncoding = "ISO8859_7"; ! } else if (dbEncoding.equals("LATIN8")) { ! dbEncoding = "ISO8859_8"; ! } else if (dbEncoding.equals("LATIN9")) { ! dbEncoding = "ISO8859_9"; ! } else if (dbEncoding.equals("EUC_JP")) { ! dbEncoding = "EUC_JP"; ! } else if (dbEncoding.equals("EUC_CN")) { ! dbEncoding = "EUC_CN"; ! } else if (dbEncoding.equals("EUC_KR")) { ! dbEncoding = "EUC_KR"; ! } else if (dbEncoding.equals("EUC_TW")) { ! dbEncoding = "EUC_TW"; ! } else if (dbEncoding.equals("KOI8")) { ! // try first if KOI8_U is present, it's a superset of KOI8_R ! try { ! dbEncoding = "KOI8_U"; ! "test".getBytes(dbEncoding); ! } ! catch(UnsupportedEncodingException uee) { ! // well, KOI8_U is still not in standard JDK, falling back to KOI8_R :( ! dbEncoding = "KOI8_R"; ! } ! } else if (dbEncoding.equals("WIN")) { ! dbEncoding = "Cp1252"; ! } else if (dbEncoding.equals("UNKNOWN")) { ! //This isn't a multibyte database so we don't have an encoding to use ! //We leave dbEncoding null which will cause the default encoding for the ! //JVM to be used ! dbEncoding = null; ! } else { ! dbEncoding = null; ! } ! } ! //Set the encoding for this connection ! //Since the encoding could be specified or obtained from the DB we use the ! //following order: ! // 1. passed as a property ! // 2. value from DB if supported by current JVM ! // 3. default for JVM (leave encoding null) ! String passedEncoding = info.getProperty("charSet"); // could be null ! ! if (passedEncoding != null) { ! encoding = passedEncoding; ! } else { ! if (dbEncoding != null) { ! //test DB encoding ! try { ! "TEST".getBytes(dbEncoding); ! //no error the encoding is supported by the current JVM ! encoding = dbEncoding; ! } catch (UnsupportedEncodingException uee) { ! //dbEncoding is not supported by the current JVM ! encoding = null; ! } ! } else { ! encoding = null; ! } } // Initialise object handling initObjectTypes(); --- 249,282 ---- break; case 'E': case 'N': ! throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096, encoding)); default: throw new PSQLException("postgresql.con.setup"); } firstWarning = null; ! String dbEncoding; ! // "pg_encoding_to_char(1)" will return 'EUC_JP' for a backend compiled with multibyte, ! // otherwise it's hardcoded to 'SQL_ASCII'. ! // If the backend doesn't know about multibyte we can't assume anything about the encoding ! // used, so we denote this with 'UNKNOWN'. ! final String encodingQuery = ! "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else getdatabaseencoding() end"; + // Set datestyle and fetch db encoding in a single call, to avoid making + // more than one round trip to the backend during connection startup. ! java.sql.ResultSet resultSet = ! ExecSQL("set datestyle to 'ISO'; " + encodingQuery); ! ! if (! resultSet.next()) { ! throw new PSQLException("postgresql.con.failed", "failed getting backend encoding"); } + dbEncoding = resultSet.getString(1); + encoding = Encoding.getEncoding(dbEncoding, info.getProperty("charSet")); // Initialise object handling initObjectTypes(); *************** *** 455,476 **** int insert_oid = 0; SQLException final_error = null; ! // Commented out as the backend can now handle queries ! // larger than 8K. Peter June 6 2000 ! //if (sql.length() > 8192) ! //throw new PSQLException("postgresql.con.toolong",sql); ! ! if (getEncoding() == null) ! buf = sql.getBytes(); ! else { ! try { ! buf = sql.getBytes(getEncoding()); ! } catch (UnsupportedEncodingException unse) { ! throw new PSQLException("postgresql.con.encoding", ! unse); ! } ! } ! try { pg_stream.SendChar('Q'); --- 376,382 ---- int insert_oid = 0; SQLException final_error = null; ! buf = encoding.encode(sql); try { pg_stream.SendChar('Q'); *************** *** 491,497 **** { case 'A': // Asynchronous Notify pid = pg_stream.ReceiveInteger(4); ! msg = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); break; case 'B': // Binary Data Transfer if (fields == null) --- 397,403 ---- { case 'A': // Asynchronous Notify pid = pg_stream.ReceiveInteger(4); ! msg = pg_stream.ReceiveString(receive_sbuf,8192,encoding); break; case 'B': // Binary Data Transfer if (fields == null) *************** *** 502,508 **** tuples.addElement(tup); break; case 'C': // Command Status ! recv_status = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); // Now handle the update count correctly. if(recv_status.startsWith("INSERT") || recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") || recv_status.startsWith("MOVE")) { --- 408,414 ---- tuples.addElement(tup); break; case 'C': // Command Status ! recv_status = pg_stream.ReceiveString(receive_sbuf,8192,encoding); // Now handle the update count correctly. if(recv_status.startsWith("INSERT") || recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") || recv_status.startsWith("MOVE")) { *************** *** 544,550 **** tuples.addElement(tup); break; case 'E': // Error Message ! msg = pg_stream.ReceiveString(receive_sbuf,4096,getEncoding()); final_error = new SQLException(msg); hfr = true; break; --- 450,456 ---- tuples.addElement(tup); break; case 'E': // Error Message ! msg = pg_stream.ReceiveString(receive_sbuf,4096,encoding); final_error = new SQLException(msg); hfr = true; break; *************** *** 559,568 **** hfr = true; break; case 'N': // Error Notification ! addWarning(pg_stream.ReceiveString(receive_sbuf,4096,getEncoding())); break; case 'P': // Portal Name ! String pname = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); break; case 'T': // MetaData Field Description if (fields != null) --- 465,474 ---- hfr = true; break; case 'N': // Error Notification ! addWarning(pg_stream.ReceiveString(receive_sbuf,4096,encoding)); break; case 'P': // Portal Name ! String pname = pg_stream.ReceiveString(receive_sbuf,8192,encoding); break; case 'T': // MetaData Field Description if (fields != null) *************** *** 595,601 **** for (i = 0 ; i < nf ; ++i) { ! String typname = pg_stream.ReceiveString(receive_sbuf,8192,getEncoding()); int typid = pg_stream.ReceiveIntegerR(4); int typlen = pg_stream.ReceiveIntegerR(2); int typmod = pg_stream.ReceiveIntegerR(4); --- 501,507 ---- for (i = 0 ; i < nf ; ++i) { ! String typname = pg_stream.ReceiveString(receive_sbuf,8192,encoding); int typid = pg_stream.ReceiveIntegerR(4); int typlen = pg_stream.ReceiveIntegerR(2); int typmod = pg_stream.ReceiveIntegerR(4); *************** *** 665,671 **** * default encoding. */ public String getEncoding() throws SQLException { ! return encoding; } /** --- 571,577 ---- * default encoding. */ public String getEncoding() throws SQLException { ! return encoding.name(); } /** *** ./src/interfaces/jdbc/org/postgresql/PG_Stream.java.orig Tue Jul 10 22:09:49 2001 --- ./src/interfaces/jdbc/org/postgresql/PG_Stream.java Tue Jul 10 22:10:06 2001 *************** *** 10,16 **** import org.postgresql.util.*; /** ! * @version 1.0 15-APR-1997 * * This class is used by Connection & PGlobj for communicating with the * backend. --- 10,16 ---- import org.postgresql.util.*; /** ! * $Id$ * * This class is used by Connection & PGlobj for communicating with the * backend. *************** *** 211,217 **** public String ReceiveString(int maxsiz) throws SQLException { byte[] rst = bytePoolDim1.allocByte(maxsiz); ! return ReceiveString(rst, maxsiz, null); } /** --- 211,217 ---- public String ReceiveString(int maxsiz) throws SQLException { byte[] rst = bytePoolDim1.allocByte(maxsiz); ! return ReceiveString(rst, maxsiz, Encoding.defaultEncoding()); } /** *************** *** 225,231 **** * @return string from back end * @exception SQLException if an I/O error occurs */ ! public String ReceiveString(int maxsiz, String encoding) throws SQLException { byte[] rst = bytePoolDim1.allocByte(maxsiz); return ReceiveString(rst, maxsiz, encoding); --- 225,231 ---- * @return string from back end * @exception SQLException if an I/O error occurs */ ! public String ReceiveString(int maxsiz, Encoding encoding) throws SQLException { byte[] rst = bytePoolDim1.allocByte(maxsiz); return ReceiveString(rst, maxsiz, encoding); *************** *** 243,251 **** * @return string from back end * @exception SQLException if an I/O error occurs */ ! public String ReceiveString(byte rst[], int maxsiz, String encoding) throws SQLException { int s = 0; try --- 243,254 ---- * @return string from back end * @exception SQLException if an I/O error occurs */ ! public String ReceiveString(byte rst[], int maxsiz, Encoding encoding) throws SQLException { + if (encoding == null) + encoding = Encoding.defaultEncoding(); + int s = 0; try *************** *** 266,282 **** } catch (IOException e) { throw new PSQLException("postgresql.stream.ioerror",e); } ! String v = null; ! if (encoding == null) ! v = new String(rst, 0, s); ! else { ! try { ! v = new String(rst, 0, s, encoding); ! } catch (UnsupportedEncodingException unse) { ! throw new PSQLException("postgresql.stream.encoding", unse); ! } ! } ! return v; } /** --- 269,275 ---- } catch (IOException e) { throw new PSQLException("postgresql.stream.ioerror",e); } ! return encoding.decode(rst, 0, s); } /** *** ./src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java.orig Tue Jul 10 22:09:49 2001 --- ./src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java Tue Jul 10 22:10:06 2001 *************** *** 195,200 **** --- 195,201 ---- suite.addTestSuite(DriverTest.class); suite.addTestSuite(ConnectionTest.class); suite.addTestSuite(DatabaseMetaDataTest.class); + suite.addTestSuite(EncodingTest.class); // Connectivity/Protocols
package org.postgresql.core; import java.io.UnsupportedEncodingException; import java.util.*; import java.sql.SQLException; import org.postgresql.util.*; /** * Converts to and from the character encoding used by the backend. * * $Id$ */ public class Encoding { private static final Encoding DEFAULT_ENCODING = new Encoding(null); /** * Preferred JVM encodings for backend encodings. */ private static final Hashtable encodings = new Hashtable(); static { encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" }); encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" }); encodings.put("LATIN1", new String[] { "ISO8859_1" }); encodings.put("LATIN2", new String[] { "ISO8859_2" }); encodings.put("LATIN3", new String[] { "ISO8859_3" }); encodings.put("LATIN4", new String[] { "ISO8859_4" }); encodings.put("LATIN5", new String[] { "ISO8859_5" }); encodings.put("LATIN6", new String[] { "ISO8859_6" }); encodings.put("LATIN7", new String[] { "ISO8859_7" }); encodings.put("LATIN8", new String[] { "ISO8859_8" }); encodings.put("LATIN9", new String[] { "ISO8859_9" }); encodings.put("EUC_JP", new String[] { "EUC_JP" }); encodings.put("EUC_CN", new String[] { "EUC_CN" }); encodings.put("EUC_KR", new String[] { "EUC_KR" }); encodings.put("EUC_TW", new String[] { "EUC_TW" }); encodings.put("WIN", new String[] { "Cp1252" }); // We prefer KOI8-U, since it is a superset of KOI8-R. encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" }); // If the database isn't encoding-aware then we can't have // any preferred encodings. encodings.put("UNKNOWN", new String[0]); } private final String encoding; public Encoding(String encoding) { this.encoding = encoding; } /** * Get an Encoding for from the given database encoding and * the encoding passed in by the user. */ public static Encoding getEncoding(String databaseEncoding, String passedEncoding) { if (passedEncoding != null) { if (Encoding.isAvailable(passedEncoding)) { return new Encoding(passedEncoding); } else { return defaultEncoding(); } } else { return encodingForDatabaseEncoding(databaseEncoding); } } /** * Get an Encoding matching the given database encoding. */ private static Encoding encodingForDatabaseEncoding(String databaseEncoding) { // If the backend encoding is known and there is a suitable // encoding in the JVM we use that. Otherwise we fall back // to the default encoding of the JVM. if (encodings.containsKey(databaseEncoding)) { String[] candidates = (String[]) encodings.get(databaseEncoding); for (int i = 0; i < candidates.length; i++) { if (isAvailable(candidates[i])) { return new Encoding(candidates[i]); } } } return defaultEncoding(); } /** * Name of the (JVM) encoding used. */ public String name() { return encoding; } /** * Encode a string to an array of bytes. */ public byte[] encode(String s) throws SQLException { try { if (encoding == null) { return s.getBytes(); } else { return s.getBytes(encoding); } } catch (UnsupportedEncodingException e) { throw new PSQLException("postgresql.stream.encoding", e); } } /** * Decode an array of bytes into a string. */ public String decode(byte[] encodedString, int offset, int length) throws SQLException { try { if (encoding == null) { return new String(encodedString, offset, length); } else { return new String(encodedString, offset, length, encoding); } } catch (UnsupportedEncodingException e) { throw new PSQLException("postgresql.stream.encoding", e); } } /** * Decode an array of bytes into a string. */ public String decode(byte[] encodedString) throws SQLException { return decode(encodedString, 0, encodedString.length); } /** * Get an Encoding using the default encoding for the JVM. */ public static Encoding defaultEncoding() { return DEFAULT_ENCODING; } /** * Test if an encoding is available in the JVM. */ private static boolean isAvailable(String encodingName) { try { "DUMMY".getBytes(encodingName); return true; } catch (UnsupportedEncodingException e) { return false; } } }
package org.postgresql.test.jdbc2; import junit.framework.*; import org.postgresql.core.Encoding; /** * Tests for the Encoding class. * * $Id$ */ public class EncodingTest extends TestCase { public EncodingTest(String name) { super(name); } public void testCreation() throws Exception { Encoding encoding; encoding = Encoding.getEncoding("UNICODE", null); assertEquals("UTF", encoding.name().substring(0, 3).toUpperCase()); encoding = Encoding.getEncoding("SQL_ASCII", null); assert(encoding.name().toUpperCase().indexOf("ASCII") != -1); assertEquals("When encoding is unknown the default encoding should be used", Encoding.defaultEncoding(), Encoding.getEncoding("UNKNOWN", null)); encoding = Encoding.getEncoding("SQL_ASCII", "utf-8"); assert("Encoding passed in by the user should be preferred", encoding.name().toUpperCase().indexOf("UTF") != -1); } public void testTransformations() throws Exception { Encoding encoding = Encoding.getEncoding("UNICODE", null); assertEquals("ab", encoding.decode(new byte[] { 97, 98 })); assertEquals(2, encoding.encode("ab").length); assertEquals(97, encoding.encode("a")[0]); assertEquals(98, encoding.encode("b")[0]); encoding = Encoding.defaultEncoding(); assertEquals("a".getBytes()[0], encoding.encode("a")[0]); assertEquals(new String(new byte[] { 97 }), encoding.decode(new byte[] { 97 })); } }
---------------------------(end of broadcast)--------------------------- TIP 2: you can get off all lists at once with the unregister command (send "unregister YourEmailAddressHere" to [EMAIL PROTECTED])