HIVE-11129 : Issue a warning when copied from UTF-8 to ISO 8859-1 (Aihua Xu via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2620ebbc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2620ebbc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2620ebbc Branch: refs/heads/beeline-cli Commit: 2620ebbc6722a31d40f8f0d1267a9e21cbe19470 Parents: 3301b92 Author: Szehon Ho <sze...@cloudera.com> Authored: Mon Jul 13 11:46:56 2015 -0700 Committer: Szehon Ho <sze...@cloudera.com> Committed: Mon Jul 13 11:46:56 2015 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2620ebbc/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java index 3668c56..efc4c7e 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java @@ -21,6 +21,8 @@ package org.apache.hadoop.hive.serde2; import java.nio.charset.Charset; import java.util.Properties; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -34,7 +36,7 @@ import com.google.common.base.Charsets; * transform data from UTF-8 to specified charset during deserialize. */ public abstract class AbstractEncodingAwareSerDe extends AbstractSerDe { - + private static final Log LOG = LogFactory.getLog(AbstractEncodingAwareSerDe.class); protected Charset charset; @Override @@ -42,6 +44,9 @@ public abstract class AbstractEncodingAwareSerDe extends AbstractSerDe { public void initialize(Configuration conf, Properties tbl) throws SerDeException { charset = Charset.forName(tbl.getProperty(serdeConstants.SERIALIZATION_ENCODING, "UTF-8")); + if (this.charset.equals(Charsets.ISO_8859_1) || this.charset.equals(Charsets.US_ASCII)) { + LOG.warn("The data may not be properly converted to target charset " + charset); + } } @Override