Author: olga Date: Fri Oct 31 12:15:07 2008 New Revision: 709537 URL: http://svn.apache.org/viewvc?rev=709537&view=rev Log: PIG-497: UTF8 handling in BinStorage
Modified: hadoop/pig/branches/types/CHANGES.txt hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java hadoop/pig/branches/types/test/org/apache/pig/test/Util.java Modified: hadoop/pig/branches/types/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/CHANGES.txt?rev=709537&r1=709536&r2=709537&view=diff ============================================================================== --- hadoop/pig/branches/types/CHANGES.txt (original) +++ hadoop/pig/branches/types/CHANGES.txt Fri Oct 31 12:15:07 2008 @@ -303,3 +303,6 @@ PIG-507: permission error not reported (pradeepk via olgan) PIG-508: problem with double joins (pradeepk via olgan) + + PIG-497: problems with UTF8 handling in BinStorage (pradeepk via olgan) + Modified: hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java?rev=709537&r1=709536&r2=709537&view=diff ============================================================================== --- hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java (original) +++ hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java Fri Oct 31 12:15:07 2008 @@ -105,12 +105,8 @@ return new DataByteArray(ba); } - case DataType.CHARARRAY: { - int size = in.readInt(); - byte[] ba = new byte[size]; - in.readFully(ba); - return new String(ba); - } + case DataType.CHARARRAY: + return in.readUTF(); case DataType.NULL: return null; @@ -194,9 +190,7 @@ case DataType.CHARARRAY: { out.writeByte(DataType.CHARARRAY); - String s = (String)val; - out.writeInt(s.length()); - out.writeBytes(s); + out.writeUTF((String)val); break; } Modified: hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java?rev=709537&r1=709536&r2=709537&view=diff ============================================================================== --- hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java (original) +++ hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java Fri Oct 31 12:15:07 2008 @@ -693,4 +693,18 @@ assertEquals(output.second, t.get(2)); } } + + @Test + public void testUtf8Dump() throws IOException, ExecException { + + // Create input file with unicode data + File input = Util.createInputFile("tmp", "", + new String[] {"wendyξ"}); + pigServer.registerQuery("a = load 'file:" + Util.encodeEscape(input.toString()) + "' using PigStorage() " + + "as (name:chararray);"); + Iterator<Tuple> it = pigServer.openIterator("a"); + Tuple t = it.next(); + assertEquals("wendyξ", t.get(0)); + + } } Modified: hadoop/pig/branches/types/test/org/apache/pig/test/Util.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/Util.java?rev=709537&r1=709536&r2=709537&view=diff ============================================================================== --- hadoop/pig/branches/types/test/org/apache/pig/test/Util.java (original) +++ hadoop/pig/branches/types/test/org/apache/pig/test/Util.java Fri Oct 31 12:15:07 2008 @@ -18,6 +18,8 @@ package org.apache.pig.test; import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.HashMap; @@ -166,7 +168,7 @@ throws IOException { File f = File.createTempFile(tmpFilenamePrefix, tmpFilenameSuffix); f.deleteOnExit(); - PrintWriter pw = new PrintWriter(f); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(f), "UTF-8")); for (int i=0; i<inputData.length; i++){ pw.println(inputData[i]); }