Thanks for the patch Patrick. Hate to be a pain but could you please use cvs diff and attach the result to bugzilla? This helps us apply and track the patch. See http://jakarta.apache.org/poi/getinvolved/index.html for more information.
Regards, Glen Stampoultzis (TriNexus Pty Ltd) Fixed:+61 3 9753-6850 Mob:+61 (0)402 835 458 ICQ: 62722370 EMail: [EMAIL PROTECTED] URL's: http://jakarta.apache.org/poi, http://www.krysalis.org > Hi all > > I have made some modifications on BoundSheetRecord.java (an ugly one > though) to support unicode(Chinese in my case) in sheetname. Could > somebody review it, please. I am willing to modify and/or refactor it. > > For read in unicode support, this patch extends protected void > fillFields(byte [] data, short size, int offset) so that it will interpret > BIFF8 structure as needed. It REUSE the SSTDeserializer.manufactureStrings > () as it correctly interpret the BIFF8 > structure. > > 'setSheetname' also modified to set the field4_compressed_unicode_flag > depending on whether sheetname is 16bit encoding string. > > To write out unicode string, public int serialize(int offset, byte [] > data) is extended. > > Attached below is the code. > > Thanks > Patrick Lee > > > > > > /** > * lifted from SSTDeserializer > */ > > private void arraycopy( byte[] src, int src_position, > byte[] dst, int dst_position, > int length ) > { > System.arraycopy( src, src_position, dst, dst_position, length ); > } > > > protected void fillFields(byte [] data, short size, int offset) > { > field_1_position_of_BOF = LittleEndian.getInt(data, > 0 + offset); > field_2_option_flags = LittleEndian.getShort(data, > 4 + offset); > field_3_sheetname_length = data[ 6 + offset ]; > field_4_compressed_unicode_flag = data[ 7 + offset ]; > //field_5_sheetname = new String(data, 8 + offset, > // LittleEndian.ubyteToInt( field_3_sheetname_length)); > BinaryTree tempBT = new BinaryTree(); > SSTDeserializer deserializer; > deserializer = new SSTDeserializer( tempBT); > int length = LittleEndian.ubyteToInt( field_3_sheetname_length); > // deserializer.dump(data,0,size); > if ((field_4_compressed_unicode_flag & 0x01)==1) { > byte [] newData = new byte[length*2 +3]; > arraycopy(data,7+offset,newData,2,length*2+1); > LittleEndian.putShort(newData,0,(short)data[6+offset]); > // deserializer.dump(newData,0,length*2 +3); > // System.out.println("calling manufactureStrings!"); > deserializer.manufactureStrings(newData,0, (short)(length *2+3)); > // System.out.println("returned from manufactureStrings!"); > field_5_sheetname = ((UnicodeString)tempBT.get(new > Integer(0))).getString(); > > tempBT=null; > } > else { > field_5_sheetname = new String(data, 8 + offset, > LittleEndian.ubyteToInt( field_3_sheetname_length)); > } > // System.out.println("f_5_sn is "+field_5_sheetname); > } > > > > > /** > * Check if String use 16-bit encoding character > * Lifted from SSTRecord.addString > */ > > public boolean is16bitString(String string) > { > // scan for characters greater than 255 ... if any are > // present, we have to use 16-bit encoding. Otherwise, we > // can use 8-bit encoding > boolean useUTF16 = false; > int strlen = string.length(); > > for ( int j = 0; j < strlen; j++ ) > { > if ( string.charAt( j ) > 255 ) > { > useUTF16 = true; > break; > } > } > return useUTF16 ; > } > > /** > * Set the sheetname for this sheet. (this appears in the tabs at the > bottom) > * @param sheetname the name of the sheet > */ > > public void setSheetname(String sheetname) > { > boolean is16bit = is16bitString(sheetname); > setSheetnameLength((byte) sheetname.length() ); > setCompressedUnicodeFlag((byte ) (is16bit?1:0)); > field_5_sheetname = sheetname; > > } > > > > public int serialize(int offset, byte [] data) > { > LittleEndian.putShort(data, 0 + offset, sid); > LittleEndian.putShort(data, 2 + offset, > ( short ) (0x08 + getSheetnameLength()* > (getCompressedUnicodeFlag()==0?1:2))); > LittleEndian.putInt(data, 4 + offset, getPositionOfBof()); > LittleEndian.putShort(data, 8 + offset, getOptionFlags()); > data[ 10 + offset ] = getSheetnameLength(); > data[ 11 + offset ] = getCompressedUnicodeFlag(); > > if (getCompressedUnicodeFlag()==0){ > // we assume compressed unicode (bein the dern americans we are ; > -p) > StringUtil.putCompressedUnicode(getSheetname(), data, 12 + > offset); > } > else { > try { > StringUtil.putUncompressedUnicode(getSheetname(), data, 12 + > offset); > // String unicodeString = new String(getSheetname().getBytes > ("Unicode"),"Unicode"); > // StringUtil.putUncompressedUnicode(unicodeString, data, 12 + > offset); > } > catch (Exception e){ > System.out.println("encoding exception in > BoundSheetRecord.serialize!"); > } > > > } > return getRecordSize(); > } > > > > > > -- > To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> > For additional commands, e-mail: <mailto:[EMAIL PROTECTED]> > -- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>
