gnu_java_nio_charset_iconv_IconvDecoder.c has this coding: if (errno == EILSEQ || errno == EINVAL) retval = 1; else retval = 0;
But errno == EINVAL is a normal status which should result in CoderResult.UNDERFLOW and not in CoderResult.MALFORMED. errno == EINVAL being treated as invalid, there are cases where charset conversion goes wrong. Following is a program which shows this bug. import gnu.java.nio.charset.iconv.*; import java.nio.*; import java.nio.charset.*; public class TestIconvCharset { public static void main(String[] args) throws Exception { int size = Integer.parseInt(args[0]); byte[] eucbytes = new byte[11 * 100]; for (int i = 0, j = 0; i < 100; i++, j+=11) { eucbytes[j] = (byte)'A'; for (int k = j+1; k < j+9; k++) { eucbytes[k] = (byte)0xa1; } eucbytes[j+9] = (byte)'B'; eucbytes[j+10] = (byte)'C'; } Charset eucjp = IconvProvider.provider().charsetForName("EUC-JP"); CharsetDecoder decoder = eucjp.newDecoder(); CharsetEncoder encoder = eucjp.newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPORT); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); int inpos = 0; while (inpos < eucbytes.length) { int l = eucbytes.length - inpos; if (l > size) l = size; byte[] subarray = new byte[l]; System.arraycopy(eucbytes, inpos, subarray, 0, l); ByteBuffer bbuf = ByteBuffer.wrap(subarray); CharBuffer cbuf = CharBuffer.wrap(new char[l]); boolean endInput = (inpos + l >= eucbytes.length); decoder.reset(); CoderResult cr = decoder.decode(bbuf, cbuf, endInput); System.err.println("Processed: pos=" + inpos + " " + bbuf.position() + " bytes --> " + cbuf.position() + " characters"); cbuf.flip(); check(bbuf, cbuf, encoder, inpos); if (cr.isError()) { describe(cr, bbuf, inpos); bbuf.position(bbuf.position() + cr.length()); } inpos += bbuf.position(); } } private static void check(ByteBuffer in, CharBuffer out, CharsetEncoder encoder, int inpos) { try { encoder.reset(); ByteBuffer outb = encoder.encode(out); // System.err.println("Encoded: " + out.limit() + " --> " + outb.limit()); for (int i = 0; i < in.position() || i < outb.limit(); i++) { if (i < in.position() && i < outb.limit()) { if (in.get(i) != outb.get(i)) { System.err.println("Changed: pos=" + (inpos+i) + " " + hex(in.get(i)) + "-->" + hex(outb.get(i))); } } else if (i >= in.position()) { System.err.println("Appeared: pos=" + (inpos+i) + " " + hex(outb.get(i))); } else { System.err.println("Lost: pos=" + (inpos+i) + " " + hex(in.get(i))); } } } catch (Exception e) { System.err.println("check: " + e); } } private static void describe(CoderResult cr, ByteBuffer in, int inpos) { try { int len = cr.length(); int pos = in.position(); for (int i = pos; i < pos + len; i++) { System.err.println(cr.toString() + ": pos = " + (inpos+i) + " " + hex(in.get(i))); } } catch (Exception e) { System.err.println("describe: " + e); } } private static String hex(byte b) { return "0x" + Integer.toHexString(b & 0xff); } } For example, $ java TestIconvCharset 600 Processed: pos=0 599 bytes --> 381 characters MALFORMED[1]: pos = 599 0xa1 Processed: pos=600 2 bytes --> 1 characters MALFORMED[1]: pos = 602 0xa1 Processed: pos=603 497 bytes --> 317 characters After deleting "errno == EINVAL", this program gives an expected result. $ java TestIconvCharset 600 Processed: pos=0 599 bytes --> 381 characters Processed: pos=599 501 bytes --> 319 characters -- Summary: gnu_java_nio_charset_iconv_IconvDecoder.c treats normal bytes as invalid Product: classpath Version: unspecified Status: UNCONFIRMED Severity: normal Priority: P2 Component: classpath AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: kaz at maczuka dot gcd dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24467 _______________________________________________ Bug-classpath mailing list Bug-classpath@gnu.org http://lists.gnu.org/mailman/listinfo/bug-classpath