DO NOT REPLY TO THIS EMAIL, BUT PLEASE POST YOUR BUG RELATED COMMENTS THROUGH THE WEB INTERFACE AVAILABLE AT <http://nagoya.apache.org/bugzilla/show_bug.cgi?id=10548>. ANY REPLY MADE TO THIS MESSAGE WILL NOT BE COLLECTED AND INSERTED IN THE BUG DATABASE.
http://nagoya.apache.org/bugzilla/show_bug.cgi?id=10548 [PATCH] Unicode Support for excell sheetname Summary: [PATCH] Unicode Support for excell sheetname Product: POI Version: 2.0-dev Platform: PC OS/Version: Windows NT/2K Status: NEW Severity: Enhancement Priority: Other Component: HSSF AssignedTo: [EMAIL PROTECTED] ReportedBy: [EMAIL PROTECTED] Hi all, I have made some modifications on BoundSheetRecord.java (an ugly one though) to support unicode(Chinese in my case) in sheetname. Could somebody review it, please. I am willing to modify and/or refactor it. For read in unicode support, this patch extends protected void fillFields(byte [] data, short size, int offset) so that it will interpret BIFF8 structure as needed. It 'REUSEs' the SSTDeserializer.manufactureStrings() as it correctly interpret the BIFF8 structure. 'setSheetname' also modified to set the field4_compressed_unicode_flag depending on whether sheetname is 16bit encoding string. To write out unicode string, public int serialize(int offset, byte []data) is extended. Attached below is the code. Thanks Patrick Lee ? unicodeSheetname.patch Index: src/java/org/apache/poi/hssf/record/BoundSheetRecord.java =================================================================== RCS file: /home/cvspublic/jakarta-poi/src/java/org/apache/poi/hssf/record/BoundSheetRecord.java,v retrieving revision 1.4 diff -u -r1.4 BoundSheetRecord.java --- src/java/org/apache/poi/hssf/record/BoundSheetRecord.java 1 Mar 2002 13:27:10 -0000 1.4 +++ src/java/org/apache/poi/hssf/record/BoundSheetRecord.java 8 Jul 2002 09:01:22 +-0000 @@ -54,7 +54,7 @@ */ package org.apache.poi.hssf.record; - +import org.apache.poi.util.BinaryTree; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.StringUtil; @@ -117,6 +117,16 @@ } } + /** + * lifted from SSTDeserializer + */ + + private void arraycopy( byte[] src, int src_position, + byte[] dst, int dst_position, + int length ) + { + System.arraycopy( src, src_position, dst, dst_position, length ); + } protected void fillFields(byte [] data, short size, int offset) { field_1_position_of_BOF = LittleEndian.getInt(data, @@ -125,8 +135,28 @@ 4 + offset); field_3_sheetname_length = data[ 6 + offset ]; field_4_compressed_unicode_flag = data[ 7 + offset ]; - field_5_sheetname = new String(data, 8 + offset, - LittleEndian.ubyteToInt( field_3_sheetname_length)); + //field_5_sheetname = new String(data, 8 + offset, + // LittleEndian.ubyteToInt( field_3_sheetname_length)); + BinaryTree tempBT = new BinaryTree(); + SSTDeserializer deserializer; + deserializer = new SSTDeserializer( tempBT); + int length = LittleEndian.ubyteToInt( field_3_sheetname_length); + if ((field_4_compressed_unicode_flag & 0x01)==1) { + byte [] newData = new byte[length*2 +3]; + arraycopy(data,7+offset,newData,2,length*2+1); + LittleEndian.putShort(newData,0,(short)data[6+offset]); +// System.out.println("calling manufactureStrings!"); + deserializer.manufactureStrings(newData,0, (short)(length *2+3)); +// System.out.println("returned from manufactureStrings!"); + field_5_sheetname = ((UnicodeString)tempBT.get(new Integer(0))).getString(); + + tempBT=null; + } + else { + field_5_sheetname = new String(data, 8 + offset, + LittleEndian.ubyteToInt( field_3_sheetname_length)); + } +// System.out.println("f_5_sn is "+field_5_sheetname); } /** @@ -175,13 +205,39 @@ } /** + * Check if String use 16-bit encoding character + * Lifted from SSTRecord.addString + */ + public boolean is16bitString(String string) + { + // scan for characters greater than 255 ... if any are + // present, we have to use 16-bit encoding. Otherwise, we + // can use 8-bit encoding + boolean useUTF16 = false; + int strlen = string.length(); + + for ( int j = 0; j < strlen; j++ ) + { + if ( string.charAt( j ) > 255 ) + { + useUTF16 = true; + break; + } + } + return useUTF16 ; + } + /** * Set the sheetname for this sheet. (this appears in the tabs at the bottom) * @param sheetname the name of the sheet */ public void setSheetname(String sheetname) { + boolean is16bit = is16bitString(sheetname); + setSheetnameLength((byte) sheetname.length() ); + setCompressedUnicodeFlag((byte ) (is16bit?1:0)); field_5_sheetname = sheetname; + } /** @@ -263,20 +319,34 @@ { LittleEndian.putShort(data, 0 + offset, sid); LittleEndian.putShort(data, 2 + offset, - ( short ) (0x08 + getSheetnameLength())); + ( short ) (0x08 + getSheetnameLength()* (getCompressedUnicodeFlag()==0?1:2))); LittleEndian.putInt(data, 4 + offset, getPositionOfBof()); LittleEndian.putShort(data, 8 + offset, getOptionFlags()); data[ 10 + offset ] = getSheetnameLength(); data[ 11 + offset ] = getCompressedUnicodeFlag(); - // we assume compressed unicode (bein the dern americans we are ;-p) - StringUtil.putCompressedUnicode(getSheetname(), data, 12 + offset); + if (getCompressedUnicodeFlag()==0){ + // we assume compressed unicode (bein the dern americans we are ;-p) + StringUtil.putCompressedUnicode(getSheetname(), data, 12 + offset); + } + else { + try { + StringUtil.putUncompressedUnicode(getSheetname(), data, 12 + offset); + // String unicodeString = new String(getSheetname().getBytes("Unicode"),"Unicode"); + // StringUtil.putUncompressedUnicode(unicodeString, data, 12 + offset); + } + catch (Exception e){ + System.out.println("encoding exception in BoundSheetRecord.serialize!"); + } + + + } return getRecordSize(); } public int getRecordSize() { - return 12 + getSheetnameLength(); + return 12 + getSheetnameLength()* (getCompressedUnicodeFlag()==0?1:2); } public short getSid() -- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>
