On Thu, 12 Jul 2001, Bruce Momjian wrote:

> Your patch has been added to the PostgreSQL unapplied patches list at:

Here is a new version of that patch, with improvements from some feedback
I got from Barry Lind.
More of the encoding-related logic is moved into the Encoding class and
I've added som clarifying comments.

/Anders

> > With this patch I've done an attempt to make the handling of character
> > encoding in the JDBC driver a little clearer.
> >
> > * Cleans up the logic to select a JVM encoding for a backend encoding.
> > * Makes the connection setup code easier to read.
> > * Gathers character encoding and decoding in a single place.
> > * Adds unit tests for encoding.
> > * Introduces a new class, org.postgresql.core.Encoding, and the
> > corresponding unit test class, org.postgresql.test.jdbc2.EncodingTest.


_____________________________________________________________________
A n d e r s  B e n g t s s o n                   [EMAIL PROTECTED]
Stockholm, Sweden
*** ./src/interfaces/jdbc/org/postgresql/Connection.java.orig   Tue Jul 10 22:09:49 
2001
--- ./src/interfaces/jdbc/org/postgresql/Connection.java        Wed Jul 11 23:02:58 
2001
***************
*** 8,13 ****
--- 8,14 ----
  import org.postgresql.fastpath.*;
  import org.postgresql.largeobject.*;
  import org.postgresql.util.*;
+ import org.postgresql.core.Encoding;
  
  /**
   * $Id: Connection.java,v 1.17 2001/06/07 00:09:32 momjian Exp $
***************
*** 33,43 ****
  
    /**
     *  The encoding to use for this connection.
-    *  If <b>null</b>, the encoding has not been specified by the
-    *  user, and the default encoding for the platform should be
-    *  used.
     */
!   private String encoding;
  
    public boolean CONNECTION_OK = true;
    public boolean CONNECTION_BAD = false;
--- 34,41 ----
  
    /**
     *  The encoding to use for this connection.
     */
!   private Encoding encoding = Encoding.defaultEncoding();
  
    public boolean CONNECTION_OK = true;
    public boolean CONNECTION_BAD = false;
***************
*** 168,174 ****
                // "User authentication failed"
                //
                throw new SQLException(pg_stream.ReceiveString
!                                        (receive_sbuf, 4096, getEncoding()));
  
              case 'R':
                // Get the type of request
--- 166,172 ----
                // "User authentication failed"
                //
                throw new SQLException(pg_stream.ReceiveString
!                                        (receive_sbuf, 4096, encoding));
  
              case 'R':
                // Get the type of request
***************
*** 239,245 ****
        case 'E':
        case 'N':
             throw new SQLException(pg_stream.ReceiveString
!                                   (receive_sbuf, 4096, getEncoding()));
          default:
            throw new PSQLException("postgresql.con.setup");
        }
--- 237,243 ----
        case 'E':
        case 'N':
             throw new SQLException(pg_stream.ReceiveString
!                                   (receive_sbuf, 4096, encoding));
          default:
            throw new PSQLException("postgresql.con.setup");
        }
***************
*** 251,361 ****
           break;
        case 'E':
        case 'N':
!            throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096, 
getEncoding()));
          default:
            throw new PSQLException("postgresql.con.setup");
        }
  
-       // Originally we issued a SHOW DATESTYLE statement to find the databases 
default
-       // datestyle. However, this caused some problems with timestamps, so in 6.5, 
we
-       // went the way of ODBC, and set the connection to ISO.
-       //
-       // This may cause some clients to break when they assume anything other than 
ISO,
-       // but then - they should be using the proper methods ;-)
-       //
-       // We also ask the DB for certain properties (i.e. DatabaseEncoding at this 
time)
-       //
        firstWarning = null;
  
!       java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; " +
!         "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else 
getdatabaseencoding() end");
  
!       String dbEncoding = null;
!       //retrieve DB properties
!       if(initrset.next()) {
! 
!         //handle DatabaseEncoding
!         dbEncoding = initrset.getString(1);
!         //convert from the PostgreSQL name to the Java name
!         if (dbEncoding.equals("SQL_ASCII")) {
!           dbEncoding = "ASCII";
!         } else if (dbEncoding.equals("UNICODE")) {
!           dbEncoding = "UTF8";
!         } else if (dbEncoding.equals("LATIN1")) {
!           dbEncoding = "ISO8859_1";
!         } else if (dbEncoding.equals("LATIN2")) {
!           dbEncoding = "ISO8859_2";
!         } else if (dbEncoding.equals("LATIN3")) {
!           dbEncoding = "ISO8859_3";
!         } else if (dbEncoding.equals("LATIN4")) {
!           dbEncoding = "ISO8859_4";
!         } else if (dbEncoding.equals("LATIN5")) {
!           dbEncoding = "ISO8859_5";
!         } else if (dbEncoding.equals("LATIN6")) {
!           dbEncoding = "ISO8859_6";
!         } else if (dbEncoding.equals("LATIN7")) {
!           dbEncoding = "ISO8859_7";
!         } else if (dbEncoding.equals("LATIN8")) {
!           dbEncoding = "ISO8859_8";
!         } else if (dbEncoding.equals("LATIN9")) {
!           dbEncoding = "ISO8859_9";
!         } else if (dbEncoding.equals("EUC_JP")) {
!           dbEncoding = "EUC_JP";
!         } else if (dbEncoding.equals("EUC_CN")) {
!           dbEncoding = "EUC_CN";
!         } else if (dbEncoding.equals("EUC_KR")) {
!           dbEncoding = "EUC_KR";
!         } else if (dbEncoding.equals("EUC_TW")) {
!           dbEncoding = "EUC_TW";
!         } else if (dbEncoding.equals("KOI8")) {
!         // try first if KOI8_U is present, it's a superset of KOI8_R
!           try {
!               dbEncoding = "KOI8_U";
!               "test".getBytes(dbEncoding);
!           }
!           catch(UnsupportedEncodingException uee) {
!           // well, KOI8_U is still not in standard JDK, falling back to KOI8_R :(
!               dbEncoding = "KOI8_R";
!           }
  
!         } else if (dbEncoding.equals("WIN")) {
!           dbEncoding = "Cp1252";
!         } else if (dbEncoding.equals("UNKNOWN")) {
!           //This isn't a multibyte database so we don't have an encoding to use
!           //We leave dbEncoding null which will cause the default encoding for the
!           //JVM to be used
!           dbEncoding = null;
!         } else {
!           dbEncoding = null;
!         }
!       }
  
  
!       //Set the encoding for this connection
!       //Since the encoding could be specified or obtained from the DB we use the
!       //following order:
!       //  1.  passed as a property
!       //  2.  value from DB if supported by current JVM
!       //  3.  default for JVM (leave encoding null)
!       String passedEncoding = info.getProperty("charSet");  // could be null
! 
!       if (passedEncoding != null) {
!         encoding = passedEncoding;
!       } else {
!         if (dbEncoding != null) {
!           //test DB encoding
!           try {
!             "TEST".getBytes(dbEncoding);
!             //no error the encoding is supported by the current JVM
!             encoding = dbEncoding;
!           } catch (UnsupportedEncodingException uee) {
!             //dbEncoding is not supported by the current JVM
!             encoding = null;
!           }
!         } else {
!           encoding = null;
!         }
        }
  
        // Initialise object handling
        initObjectTypes();
--- 249,282 ----
           break;
        case 'E':
        case 'N':
!            throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096, 
encoding));
          default:
            throw new PSQLException("postgresql.con.setup");
        }
  
        firstWarning = null;
  
!       String dbEncoding;
  
!       // "pg_encoding_to_char(1)" will return 'EUC_JP' for a backend compiled with 
multibyte,
!       // otherwise it's hardcoded to 'SQL_ASCII'.
!       // If the backend doesn't know about multibyte we can't assume anything about 
the encoding
!       // used, so we denote this with 'UNKNOWN'.
  
!       final String encodingQuery =
!         "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else 
getdatabaseencoding() end";
  
+       // Set datestyle and fetch db encoding in a single call, to avoid making
+       // more than one round trip to the backend during connection startup.
  
!       java.sql.ResultSet resultSet =
!         ExecSQL("set datestyle to 'ISO'; " + encodingQuery);
! 
!       if (! resultSet.next()) {
!         throw new PSQLException("postgresql.con.failed", "failed getting backend 
encoding");
        }
+       dbEncoding = resultSet.getString(1);
+       encoding = Encoding.getEncoding(dbEncoding, info.getProperty("charSet"));
  
        // Initialise object handling
        initObjectTypes();
***************
*** 455,476 ****
            int insert_oid = 0;
            SQLException final_error = null;
  
!           // Commented out as the backend can now handle queries
!           // larger than 8K. Peter June 6 2000
!           //if (sql.length() > 8192)
!           //throw new PSQLException("postgresql.con.toolong",sql);
! 
!         if (getEncoding() == null)
!             buf = sql.getBytes();
!         else {
!             try {
!                 buf = sql.getBytes(getEncoding());
!             } catch (UnsupportedEncodingException unse) {
!                  throw new PSQLException("postgresql.con.encoding",
!                                         unse);
!             }
!         }
! 
            try
                {
                    pg_stream.SendChar('Q');
--- 376,382 ----
            int insert_oid = 0;
            SQLException final_error = null;
  
!           buf = encoding.encode(sql);
            try
                {
                    pg_stream.SendChar('Q');
***************
*** 491,497 ****
                        {
                        case 'A':       // Asynchronous Notify
                            pid = pg_stream.ReceiveInteger(4);
!                           msg = 
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
                            break;
                        case 'B':       // Binary Data Transfer
                            if (fields == null)
--- 397,403 ----
                        {
                        case 'A':       // Asynchronous Notify
                            pid = pg_stream.ReceiveInteger(4);
!                           msg = pg_stream.ReceiveString(receive_sbuf,8192,encoding);
                            break;
                        case 'B':       // Binary Data Transfer
                            if (fields == null)
***************
*** 502,508 ****
                                tuples.addElement(tup);
                            break;
                        case 'C':       // Command Status
!                           recv_status = 
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
  
                                // Now handle the update count correctly.
                                if(recv_status.startsWith("INSERT") || 
recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") || 
recv_status.startsWith("MOVE")) {
--- 408,414 ----
                                tuples.addElement(tup);
                            break;
                        case 'C':       // Command Status
!                           recv_status = 
pg_stream.ReceiveString(receive_sbuf,8192,encoding);
  
                                // Now handle the update count correctly.
                                if(recv_status.startsWith("INSERT") || 
recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") || 
recv_status.startsWith("MOVE")) {
***************
*** 544,550 ****
                                tuples.addElement(tup);
                            break;
                        case 'E':       // Error Message
!                           msg = 
pg_stream.ReceiveString(receive_sbuf,4096,getEncoding());
                            final_error = new SQLException(msg);
                            hfr = true;
                            break;
--- 450,456 ----
                                tuples.addElement(tup);
                            break;
                        case 'E':       // Error Message
!                           msg = pg_stream.ReceiveString(receive_sbuf,4096,encoding);
                            final_error = new SQLException(msg);
                            hfr = true;
                            break;
***************
*** 559,568 ****
                                hfr = true;
                            break;
                        case 'N':       // Error Notification
!                           
addWarning(pg_stream.ReceiveString(receive_sbuf,4096,getEncoding()));
                            break;
                        case 'P':       // Portal Name
!                           String pname = 
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
                            break;
                        case 'T':       // MetaData Field Description
                            if (fields != null)
--- 465,474 ----
                                hfr = true;
                            break;
                        case 'N':       // Error Notification
!                           
addWarning(pg_stream.ReceiveString(receive_sbuf,4096,encoding));
                            break;
                        case 'P':       // Portal Name
!                           String pname = 
pg_stream.ReceiveString(receive_sbuf,8192,encoding);
                            break;
                        case 'T':       // MetaData Field Description
                            if (fields != null)
***************
*** 595,601 ****
  
        for (i = 0 ; i < nf ; ++i)
            {
!               String typname = 
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
                int typid = pg_stream.ReceiveIntegerR(4);
                int typlen = pg_stream.ReceiveIntegerR(2);
                int typmod = pg_stream.ReceiveIntegerR(4);
--- 501,507 ----
  
        for (i = 0 ; i < nf ; ++i)
            {
!               String typname = pg_stream.ReceiveString(receive_sbuf,8192,encoding);
                int typid = pg_stream.ReceiveIntegerR(4);
                int typlen = pg_stream.ReceiveIntegerR(2);
                int typmod = pg_stream.ReceiveIntegerR(4);
***************
*** 665,671 ****
       *  default encoding.
       */
      public String getEncoding() throws SQLException {
!         return encoding;
      }
  
      /**
--- 571,577 ----
       *  default encoding.
       */
      public String getEncoding() throws SQLException {
!         return encoding.name();
      }
  
      /**
*** ./src/interfaces/jdbc/org/postgresql/PG_Stream.java.orig    Tue Jul 10 22:09:49 
2001
--- ./src/interfaces/jdbc/org/postgresql/PG_Stream.java Tue Jul 10 22:10:06 2001
***************
*** 10,16 ****
  import org.postgresql.util.*;
  
  /**
!  * @version 1.0 15-APR-1997
   *
   * This class is used by Connection & PGlobj for communicating with the
   * backend.
--- 10,16 ----
  import org.postgresql.util.*;
  
  /**
!  * $Id$
   *
   * This class is used by Connection & PGlobj for communicating with the
   * backend.
***************
*** 211,217 ****
    public String ReceiveString(int maxsiz) throws SQLException
    {
      byte[] rst = bytePoolDim1.allocByte(maxsiz);
!     return ReceiveString(rst, maxsiz, null);
    }
  
    /**
--- 211,217 ----
    public String ReceiveString(int maxsiz) throws SQLException
    {
      byte[] rst = bytePoolDim1.allocByte(maxsiz);
!     return ReceiveString(rst, maxsiz, Encoding.defaultEncoding());
    }
  
    /**
***************
*** 225,231 ****
     * @return string from back end
     * @exception SQLException if an I/O error occurs
     */
!   public String ReceiveString(int maxsiz, String encoding) throws SQLException
    {
      byte[] rst = bytePoolDim1.allocByte(maxsiz);
      return ReceiveString(rst, maxsiz, encoding);
--- 225,231 ----
     * @return string from back end
     * @exception SQLException if an I/O error occurs
     */
!   public String ReceiveString(int maxsiz, Encoding encoding) throws SQLException
    {
      byte[] rst = bytePoolDim1.allocByte(maxsiz);
      return ReceiveString(rst, maxsiz, encoding);
***************
*** 243,251 ****
     * @return string from back end
     * @exception SQLException if an I/O error occurs
     */
!   public String ReceiveString(byte rst[], int maxsiz, String encoding)
        throws SQLException
    {
      int s = 0;
  
      try
--- 243,254 ----
     * @return string from back end
     * @exception SQLException if an I/O error occurs
     */
!   public String ReceiveString(byte rst[], int maxsiz, Encoding encoding)
        throws SQLException
    {
+     if (encoding == null)
+       encoding = Encoding.defaultEncoding();
+ 
      int s = 0;
  
      try
***************
*** 266,282 ****
        } catch (IOException e) {
        throw new PSQLException("postgresql.stream.ioerror",e);
        }
!       String v = null;
!       if (encoding == null)
!           v = new String(rst, 0, s);
!       else {
!           try {
!               v = new String(rst, 0, s, encoding);
!           } catch (UnsupportedEncodingException unse) {
!               throw new PSQLException("postgresql.stream.encoding", unse);
!           }
!       }
!       return v;
    }
  
    /**
--- 269,275 ----
        } catch (IOException e) {
        throw new PSQLException("postgresql.stream.ioerror",e);
        }
!       return encoding.decode(rst, 0, s);
    }
  
    /**
*** ./src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java.orig      Tue Jul 10 
22:09:49 2001
--- ./src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java   Tue Jul 10 22:10:06 
2001
***************
*** 195,200 ****
--- 195,201 ----
      suite.addTestSuite(DriverTest.class);
      suite.addTestSuite(ConnectionTest.class);
      suite.addTestSuite(DatabaseMetaDataTest.class);
+     suite.addTestSuite(EncodingTest.class);
  
      // Connectivity/Protocols
  
package org.postgresql.core;

import java.io.UnsupportedEncodingException;
import java.util.*;
import java.sql.SQLException;
import org.postgresql.util.*;

/**
 * Converts to and from the character encoding used by the backend.
 *
 * $Id$
 */

public class Encoding {

    private static final Encoding DEFAULT_ENCODING = new Encoding(null);

    /**
     * Preferred JVM encodings for backend encodings.
     */
    private static final Hashtable encodings = new Hashtable();

    static {
        encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" });
        encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" });
        encodings.put("LATIN1", new String[] { "ISO8859_1" });
        encodings.put("LATIN2", new String[] { "ISO8859_2" });
        encodings.put("LATIN3", new String[] { "ISO8859_3" });
        encodings.put("LATIN4", new String[] { "ISO8859_4" });
        encodings.put("LATIN5", new String[] { "ISO8859_5" });
        encodings.put("LATIN6", new String[] { "ISO8859_6" });
        encodings.put("LATIN7", new String[] { "ISO8859_7" });
        encodings.put("LATIN8", new String[] { "ISO8859_8" });
        encodings.put("LATIN9", new String[] { "ISO8859_9" });
        encodings.put("EUC_JP", new String[] { "EUC_JP" });
        encodings.put("EUC_CN", new String[] { "EUC_CN" });
        encodings.put("EUC_KR", new String[] { "EUC_KR" });
        encodings.put("EUC_TW", new String[] { "EUC_TW" });
        encodings.put("WIN", new String[] { "Cp1252" });
        // We prefer KOI8-U, since it is a superset of KOI8-R.
        encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" });
        // If the database isn't encoding-aware then we can't have
        // any preferred encodings.
        encodings.put("UNKNOWN", new String[0]);
    }

    private final String encoding;

    public Encoding(String encoding) {
        this.encoding = encoding;
    }

    /**
     * Get an Encoding for from the given database encoding and
     * the encoding passed in by the user.
     */
    public static Encoding getEncoding(String databaseEncoding,
                                       String passedEncoding)
    {
        if (passedEncoding != null) {
            if (Encoding.isAvailable(passedEncoding)) {
                return new Encoding(passedEncoding);
            } else {
                return defaultEncoding();
            }
        } else {
            return encodingForDatabaseEncoding(databaseEncoding);
        }
    }

    /**
     * Get an Encoding matching the given database encoding.
     */
    private static Encoding encodingForDatabaseEncoding(String databaseEncoding) {
        // If the backend encoding is known and there is a suitable
        // encoding in the JVM we use that. Otherwise we fall back
        // to the default encoding of the JVM.

        if (encodings.containsKey(databaseEncoding)) {
            String[] candidates = (String[]) encodings.get(databaseEncoding);
            for (int i = 0; i < candidates.length; i++) {
                if (isAvailable(candidates[i])) {
                    return new Encoding(candidates[i]);
                }
            }
        }
        return defaultEncoding();
    }

    /**
     * Name of the (JVM) encoding used.
     */
    public String name() {
        return encoding;
    }

    /**
     * Encode a string to an array of bytes.
     */
    public byte[] encode(String s) throws SQLException {
        try {
            if (encoding == null) {
                return s.getBytes();
            } else {
                return s.getBytes(encoding);
            }
        } catch (UnsupportedEncodingException e) {
            throw new PSQLException("postgresql.stream.encoding", e);
        }
    }

    /**
     * Decode an array of bytes into a string.
     */
    public String decode(byte[] encodedString, int offset, int length) throws 
SQLException {
        try {
            if (encoding == null) {
                return new String(encodedString, offset, length);
            } else {
                return new String(encodedString, offset, length, encoding);
            }
        } catch (UnsupportedEncodingException e) {
            throw new PSQLException("postgresql.stream.encoding", e);
        }
    }

    /**
     * Decode an array of bytes into a string.
     */
    public String decode(byte[] encodedString) throws SQLException {
        return decode(encodedString, 0, encodedString.length);
    }

    /**
     * Get an Encoding using the default encoding for the JVM.
     */
    public static Encoding defaultEncoding() {
        return DEFAULT_ENCODING;
    }

    /**
     * Test if an encoding is available in the JVM.
     */
    private static boolean isAvailable(String encodingName) {
        try {
            "DUMMY".getBytes(encodingName);
            return true;
        } catch (UnsupportedEncodingException e) {
            return false;
        }
    }
}

package org.postgresql.test.jdbc2;

import junit.framework.*;
import org.postgresql.core.Encoding;

/**
 * Tests for the Encoding class.
 *
 * $Id$
 */


public class EncodingTest extends TestCase {

    public EncodingTest(String name) {
        super(name);
    }

    public void testCreation() throws Exception {
        Encoding encoding;
        encoding = Encoding.getEncoding("UNICODE", null);
        assertEquals("UTF", encoding.name().substring(0, 3).toUpperCase());
        encoding = Encoding.getEncoding("SQL_ASCII", null);
        assert(encoding.name().toUpperCase().indexOf("ASCII") != -1);
        assertEquals("When encoding is unknown the default encoding should be used",
                     Encoding.defaultEncoding(),
                     Encoding.getEncoding("UNKNOWN", null));
        encoding = Encoding.getEncoding("SQL_ASCII", "utf-8");
        assert("Encoding passed in by the user should be preferred",
               encoding.name().toUpperCase().indexOf("UTF") != -1);
    }

    public void testTransformations() throws Exception {
        Encoding encoding = Encoding.getEncoding("UNICODE", null);
        assertEquals("ab", encoding.decode(new byte[] { 97, 98 }));

        assertEquals(2, encoding.encode("ab").length);
        assertEquals(97, encoding.encode("a")[0]);
        assertEquals(98, encoding.encode("b")[0]);

        encoding = Encoding.defaultEncoding();
        assertEquals("a".getBytes()[0], encoding.encode("a")[0]);
        assertEquals(new String(new byte[] { 97 }),
                     encoding.decode(new byte[] { 97 }));
    }
}

---------------------------(end of broadcast)---------------------------
TIP 2: you can get off all lists at once with the unregister command
    (send "unregister YourEmailAddressHere" to [EMAIL PROTECTED])

Reply via email to