Thanks for the patch Patrick.  Hate to be a pain but could you please use
cvs diff and attach the result to bugzilla?  This helps us apply and track
the patch.  See http://jakarta.apache.org/poi/getinvolved/index.html for
more information.

Regards,

Glen Stampoultzis  (TriNexus Pty Ltd)
Fixed:+61 3 9753-6850     Mob:+61 (0)402 835 458
ICQ:  62722370    EMail:  [EMAIL PROTECTED]
URL's:  http://jakarta.apache.org/poi, http://www.krysalis.org


> Hi all
>
> I have made some modifications on BoundSheetRecord.java (an ugly one
> though) to support unicode(Chinese in my case) in sheetname.  Could
> somebody review it, please.  I am willing to modify and/or refactor it.
>
> For read in unicode support, this patch extends  protected void
> fillFields(byte [] data, short size, int offset) so that it will interpret
> BIFF8 structure as needed.  It REUSE the
SSTDeserializer.manufactureStrings
> () as it correctly interpret the BIFF8
>  structure.
>
> 'setSheetname' also modified to set the field4_compressed_unicode_flag
> depending on whether sheetname is 16bit encoding string.
>
> To write out unicode string,     public int serialize(int offset, byte []
> data) is extended.
>
> Attached below is the code.
>
> Thanks
> Patrick Lee
>
>
>
>
>
>     /**
>      *  lifted from SSTDeserializer
>      */
>
>     private void arraycopy( byte[] src, int src_position,
>                             byte[] dst, int dst_position,
>                             int length )
>     {
>         System.arraycopy( src, src_position, dst, dst_position, length );
>     }
>
>
>     protected void fillFields(byte [] data, short size, int offset)
>     {
>         field_1_position_of_BOF         = LittleEndian.getInt(data,
>                 0 + offset);
>         field_2_option_flags            = LittleEndian.getShort(data,
>                 4 + offset);
>         field_3_sheetname_length        = data[ 6 + offset ];
>         field_4_compressed_unicode_flag = data[ 7 + offset ];
>        //field_5_sheetname               = new String(data, 8 + offset,
>         //        LittleEndian.ubyteToInt( field_3_sheetname_length));
>         BinaryTree tempBT = new BinaryTree();
>         SSTDeserializer deserializer;
>         deserializer = new SSTDeserializer(        tempBT);
>         int length = LittleEndian.ubyteToInt( field_3_sheetname_length);
> //        deserializer.dump(data,0,size);
>         if ((field_4_compressed_unicode_flag & 0x01)==1) {
>           byte [] newData = new byte[length*2 +3];
>           arraycopy(data,7+offset,newData,2,length*2+1);
>           LittleEndian.putShort(newData,0,(short)data[6+offset]);
> //          deserializer.dump(newData,0,length*2 +3);
> //          System.out.println("calling manufactureStrings!");
>           deserializer.manufactureStrings(newData,0, (short)(length
*2+3));
> //          System.out.println("returned from manufactureStrings!");
>           field_5_sheetname = ((UnicodeString)tempBT.get(new
> Integer(0))).getString();
>
>           tempBT=null;
>         }
>         else {
>           field_5_sheetname =   new String(data, 8 + offset,
>               LittleEndian.ubyteToInt( field_3_sheetname_length));
>         }
> //        System.out.println("f_5_sn is "+field_5_sheetname);
>     }
>
>
>
>
>     /**
>      * Check if String use 16-bit encoding character
>      * Lifted from SSTRecord.addString
>      */
>
>     public boolean is16bitString(String string)
>     {
>             // scan for characters greater than 255 ... if any are
>             // present, we have to use 16-bit encoding. Otherwise, we
>             // can use 8-bit encoding
>             boolean useUTF16 = false;
>             int strlen = string.length();
>
>             for ( int j = 0; j < strlen; j++ )
>             {
>                 if ( string.charAt( j ) > 255 )
>                 {
>                     useUTF16 = true;
>                     break;
>                 }
>             }
>             return useUTF16 ;
>    }
>
>     /**
>      * Set the sheetname for this sheet.  (this appears in the tabs at the
> bottom)
>      * @param sheetname the name of the sheet
>      */
>
>     public void setSheetname(String sheetname)
>     {
>         boolean is16bit = is16bitString(sheetname);
>         setSheetnameLength((byte) sheetname.length() );
>         setCompressedUnicodeFlag((byte ) (is16bit?1:0));
>         field_5_sheetname = sheetname;
>
>     }
>
>
>
>     public int serialize(int offset, byte [] data)
>     {
>         LittleEndian.putShort(data, 0 + offset, sid);
>         LittleEndian.putShort(data, 2 + offset,
>                               ( short ) (0x08 + getSheetnameLength()*
> (getCompressedUnicodeFlag()==0?1:2)));
>         LittleEndian.putInt(data, 4 + offset, getPositionOfBof());
>         LittleEndian.putShort(data, 8 + offset, getOptionFlags());
>         data[ 10 + offset ] = getSheetnameLength();
>         data[ 11 + offset ] = getCompressedUnicodeFlag();
>
>         if (getCompressedUnicodeFlag()==0){
>           // we assume compressed unicode (bein the dern americans we are
;
> -p)
>           StringUtil.putCompressedUnicode(getSheetname(), data, 12 +
> offset);
>         }
>         else {
>           try {
>             StringUtil.putUncompressedUnicode(getSheetname(), data, 12 +
> offset);
>   //          String unicodeString = new String(getSheetname().getBytes
> ("Unicode"),"Unicode");
>   //          StringUtil.putUncompressedUnicode(unicodeString, data, 12 +
> offset);
>           }
>           catch (Exception e){
>             System.out.println("encoding exception in
> BoundSheetRecord.serialize!");
>           }
>
>
>         }
>         return getRecordSize();
>     }
>
>
>
>
>
> --
> To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>
>



--
To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>

Reply via email to