Repository: tajo Updated Branches: refs/heads/master 7e31a3201 -> fb4135a3b
TAJO-1000: TextDatum.asChar() is incorrect, if client charset is different. (jinho) Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fb4135a3 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fb4135a3 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fb4135a3 Branch: refs/heads/master Commit: fb4135a3bf16e61345ca2a5b6a6fea5b516e7a3e Parents: 7e31a32 Author: jhkim <[email protected]> Authored: Wed Aug 13 15:04:14 2014 +0900 Committer: jhkim <[email protected]> Committed: Wed Aug 13 15:04:14 2014 +0900 ---------------------------------------------------------------------- CHANGES | 3 ++ tajo-common/pom.xml | 15 ++++++++ .../java/org/apache/tajo/datum/TextDatum.java | 8 +++-- .../org/apache/tajo/datum/TestTextDatum.java | 36 ++++++++++++++++---- tajo-project/pom.xml | 12 +++++++ 5 files changed, 65 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 5d500a8..956db89 100644 --- a/CHANGES +++ b/CHANGES @@ -112,6 +112,9 @@ Release 0.9.0 - unreleased BUG FIXES + TAJO-1000: TextDatum.asChar() is incorrect, if client charset is different. + (jinho) + TAJO-995: HiveMetaStoreClient wrapper should retry the connection. (jinho) TAJO-947: ColPartitionStoreExec can cause URISyntaxException due http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-common/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-common/pom.xml b/tajo-common/pom.xml index c0f3402..da2a7d0 100644 --- a/tajo-common/pom.xml +++ b/tajo-common/pom.xml @@ -209,6 +209,21 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs <artifactId>junit</artifactId> <scope>test</scope> </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-module-junit4</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-api-mockito</artifactId> + <scope>test</scope> + </dependency> </dependencies> <profiles> http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java index e8424b3..b642168 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java @@ -25,10 +25,14 @@ import org.apache.tajo.exception.InvalidCastException; import org.apache.tajo.exception.InvalidOperationException; import org.apache.tajo.util.MurmurHash; +import java.nio.charset.Charset; import java.util.Comparator; public class TextDatum extends Datum { + static Charset defaultCharset = Charset.forName("UTF-8"); + @Expose private final int size; + /* encoded in UTF-8 */ @Expose private final byte[] bytes; public static final TextDatum EMPTY_TEXT = new TextDatum(""); @@ -41,7 +45,7 @@ public class TextDatum extends Datum { } public TextDatum(String string) { - this(string.getBytes()); + this(string.getBytes(defaultCharset)); } @Override @@ -85,7 +89,7 @@ public class TextDatum extends Datum { } public String asChars() { - return new String(this.bytes); + return new String(this.bytes, defaultCharset); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java b/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java index 7feab46..bf48f78 100644 --- a/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java +++ b/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java @@ -18,21 +18,22 @@ package org.apache.tajo.datum; -import org.junit.Test; import org.apache.tajo.common.TajoDataTypes.Type; +import org.junit.Test; +import org.powermock.reflect.Whitebox; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import java.nio.charset.Charset; + +import static org.junit.Assert.*; public class TestTextDatum { - + @Test public final void testType() { Datum d = DatumFactory.createText("12345"); assertEquals(d.type(), Type.TEXT); } - + @Test public final void testAsInt4() { Datum d = DatumFactory.createText("12345"); @@ -62,7 +63,7 @@ public class TestTextDatum { Datum d = DatumFactory.createText("12345"); assertEquals("12345", d.asChars()); } - + @Test public final void testSize() { Datum d = DatumFactory.createText("12345"); @@ -74,4 +75,25 @@ public class TestTextDatum { Datum d = DatumFactory.createText("12345"); assertArrayEquals(d.asByteArray(), d.asTextBytes()); } + + @Test + public final void testTextEncoding() { + String text = "ëëë§ì¸ë¯¸ ëê·ì ë¬ì 문ììë¡ ì르 ì¬ë§ë ìëí ì"; + TextDatum test = new TextDatum(text); + + TextDatum fromUTF8 = new TextDatum(text.getBytes(Charset.forName("UTF-8"))); + assertEquals(test, fromUTF8); + + Charset systemCharSet = Charset.defaultCharset(); + //hack for testing + Whitebox.setInternalState(Charset.class, "defaultCharset", Charset.forName("EUC-KR")); + assertEquals(Charset.forName("EUC-KR"), Charset.defaultCharset()); + + assertEquals(text, test.asChars()); + assertNotEquals(new String(test.asByteArray()), test.asChars()); + + //restore + Whitebox.setInternalState(Charset.class, "defaultCharset", systemCharSet); + assertEquals(systemCharSet, Charset.defaultCharset()); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-project/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-project/pom.xml b/tajo-project/pom.xml index 7c0da53..b3c1fe2 100644 --- a/tajo-project/pom.xml +++ b/tajo-project/pom.xml @@ -951,6 +951,18 @@ <scope>test</scope> </dependency> <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-module-junit4</artifactId> + <version>1.5.5</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-api-mockito</artifactId> + <version>1.5.5</version> + <scope>test</scope> + </dependency> + <dependency> <groupId>io.netty</groupId> <artifactId>netty</artifactId> <version>3.6.6.Final</version>
