On Wed, Sep 1, 2010 at 8:46 AM, Robert Muir <rcm...@gmail.com> wrote:
> > > On Wed, Sep 1, 2010 at 5:43 AM, Deven You <devyo...@gmail.com> wrote: > >> I have run the test on Linux, and got the same error. Seems it is due to >> our >> UTF-8 decoder. I will do more debugging to narrow down the root cause. Any >> one is familiar with UTF-8? I hope I can get some help. >> >> > Here's a patch: --- UTF_8.java.orig 2010-09-01 08:38:54.035000000 -0400 +++ UTF_8.java 2010-09-01 08:55:28.004000000 -0400 @@ -157,8 +157,17 @@ } inIndex += tail; } - cArr[outIndex++] = (char) jchar; - outRemaining--; + if (jchar <= 0xffff) { + cArr[outIndex++] = (char) jchar; + outRemaining--; + } else { + final int chHalf = jchar - 0x10000; + cArr[outIndex++] = (char) ((chHalf >> 0xA) + 0xD800); + outRemaining--; + if (outRemaining == 0) return CoderResult.OVERFLOW; + cArr[outIndex++] = (char) ((chHalf & 0x3FF) + 0xDC00); + outRemaining--; + } } in.position(inIndex - in.arrayOffset()); out.position(outIndex - out.arrayOffset()); @@ -199,8 +208,17 @@ pos += tail; } pos++; - out.put((char) jchar); - outRemaining--; + if (jchar <= 0xffff) { + out.put((char) jchar); + outRemaining--; + } else { + final int chHalf = jchar - 0x10000; + out.put((char) ((chHalf >> 0xA) + 0xD800)); + outRemaining--; + if (outRemaining == 0) return CoderResult.OVERFLOW; + out.put((char) ((chHalf & 0x3FF) + 0xDC00)); + outRemaining--; + } } return CoderResult.UNDERFLOW; } finally { -- Robert Muir rcm...@gmail.com