HIVE-13178: Enhance ORC Schema Evolution to handle more standard data type conversions (Matt McCline, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a16058e1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a16058e1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a16058e1 Branch: refs/heads/master Commit: a16058e10025d9f7af75c2283727c7c176b770e2 Parents: e1b0383 Author: Matt McCline <[email protected]> Authored: Tue May 3 02:35:00 2016 -0700 Committer: Matt McCline <[email protected]> Committed: Tue May 3 02:35:00 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../test/resources/testconfiguration.properties | 4 + .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 53 - .../hive/ql/exec/vector/VectorExtractRow.java | 12 +- .../ql/exec/vector/VectorizationContext.java | 2 +- .../ql/io/orc/ConvertTreeReaderFactory.java | 3750 +++++++++++++ .../hadoop/hive/ql/io/orc/SchemaEvolution.java | 29 +- .../hive/ql/io/orc/TreeReaderFactory.java | 86 +- .../clientnegative/orc_replace_columns2.q | 5 +- .../clientnegative/orc_replace_columns2_acid.q | 5 +- .../clientnegative/orc_replace_columns3.q | 3 + .../clientnegative/orc_replace_columns3_acid.q | 3 + .../clientnegative/orc_type_promotion1.q | 7 +- .../clientnegative/orc_type_promotion1_acid.q | 7 +- .../clientnegative/orc_type_promotion2.q | 5 +- .../clientnegative/orc_type_promotion2_acid.q | 5 +- .../clientnegative/orc_type_promotion3.q | 5 +- .../clientnegative/orc_type_promotion3_acid.q | 5 +- .../clientpositive/orc_int_type_promotion.q | 4 + .../clientpositive/orc_schema_evolution.q | 2 + .../schema_evol_orc_acid_mapwork_part.q | 846 ++- .../schema_evol_orc_acid_mapwork_table.q | 804 ++- .../schema_evol_orc_acidvec_mapwork_part.q | 843 ++- .../schema_evol_orc_acidvec_mapwork_table.q | 801 ++- .../schema_evol_orc_nonvec_fetchwork_part.q | 831 ++- .../schema_evol_orc_nonvec_fetchwork_table.q | 824 ++- .../schema_evol_orc_nonvec_mapwork_part.q | 833 ++- ...a_evol_orc_nonvec_mapwork_part_all_complex.q | 162 + ...evol_orc_nonvec_mapwork_part_all_primitive.q | 481 ++ .../schema_evol_orc_nonvec_mapwork_table.q | 824 ++- .../schema_evol_orc_vec_mapwork_part.q | 831 ++- ...hema_evol_orc_vec_mapwork_part_all_complex.q | 162 + ...ma_evol_orc_vec_mapwork_part_all_primitive.q | 481 ++ .../schema_evol_orc_vec_mapwork_table.q | 819 ++- .../schema_evol_text_nonvec_mapwork_part.q | 5 +- ..._evol_text_nonvec_mapwork_part_all_complex.q | 5 +- ...vol_text_nonvec_mapwork_part_all_primitive.q | 5 +- .../schema_evol_text_nonvec_mapwork_table.q | 5 +- .../schema_evol_text_vec_mapwork_part.q | 2 +- ...ema_evol_text_vec_mapwork_part_all_complex.q | 2 +- ...a_evol_text_vec_mapwork_part_all_primitive.q | 2 +- .../schema_evol_text_vec_mapwork_table.q | 2 +- .../schema_evol_text_vecrow_mapwork_part.q | 2 +- ..._evol_text_vecrow_mapwork_part_all_complex.q | 2 +- ...vol_text_vecrow_mapwork_part_all_primitive.q | 2 +- .../schema_evol_text_vecrow_mapwork_table.q | 2 +- .../clientnegative/orc_replace_columns2.q.out | 13 +- .../orc_replace_columns2_acid.q.out | 13 +- .../clientnegative/orc_replace_columns3.q.out | 11 +- .../orc_replace_columns3_acid.q.out | 11 +- .../clientnegative/orc_type_promotion1.q.out | 13 +- .../orc_type_promotion1_acid.q.out | 13 +- .../clientnegative/orc_type_promotion2.q.out | 13 +- .../orc_type_promotion2_acid.q.out | 13 +- .../clientnegative/orc_type_promotion3.q.out | 11 +- .../clientnegative/orc_type_promotion3_acid.q | 18 + .../orc_type_promotion3_acid.q.out | 11 +- .../schema_evol_orc_acid_mapwork_part.q.out | 4319 ++++++++++++--- .../schema_evol_orc_acid_mapwork_table.q.out | 3334 ++++++++++-- .../schema_evol_orc_acidvec_mapwork_part.q.out | 4319 ++++++++++++--- .../schema_evol_orc_acidvec_mapwork_table.q.out | 3334 ++++++++++-- .../schema_evol_orc_nonvec_fetchwork_part.q.out | 4905 +++++++++++++++-- ...schema_evol_orc_nonvec_fetchwork_table.q.out | 4367 +++++++++++++++- .../schema_evol_orc_nonvec_mapwork_part.q.out | 4909 +++++++++++++++-- ...ol_orc_nonvec_mapwork_part_all_complex.q.out | 726 +++ ..._orc_nonvec_mapwork_part_all_primitive.q.out | 2872 ++++++++++ .../schema_evol_orc_nonvec_mapwork_table.q.out | 4367 +++++++++++++++- .../schema_evol_orc_vec_mapwork_part.q.out | 4929 ++++++++++++++++-- ..._evol_orc_vec_mapwork_part_all_complex.q.out | 726 +++ ...vol_orc_vec_mapwork_part_all_primitive.q.out | 2887 ++++++++++ .../schema_evol_orc_vec_mapwork_table.q.out | 4391 +++++++++++++++- .../tez/schema_evol_orc_acid_mapwork_part.q.out | 4319 ++++++++++++--- .../schema_evol_orc_acid_mapwork_table.q.out | 3334 ++++++++++-- .../schema_evol_orc_acidvec_mapwork_part.q.out | 4319 ++++++++++++--- .../schema_evol_orc_acidvec_mapwork_table.q.out | 3334 ++++++++++-- .../schema_evol_orc_nonvec_fetchwork_part.q.out | 4449 ++++++++++++++-- ...schema_evol_orc_nonvec_fetchwork_table.q.out | 3911 +++++++++++++- .../schema_evol_orc_nonvec_mapwork_part.q.out | 4453 ++++++++++++++-- ...ol_orc_nonvec_mapwork_part_all_complex.q.out | 669 +++ ..._orc_nonvec_mapwork_part_all_primitive.q.out | 2587 +++++++++ .../schema_evol_orc_nonvec_mapwork_table.q.out | 3911 +++++++++++++- .../tez/schema_evol_orc_vec_mapwork_part.q.out | 4449 ++++++++++++++-- ..._evol_orc_vec_mapwork_part_all_complex.q.out | 669 +++ ...vol_orc_vec_mapwork_part_all_primitive.q.out | 2587 +++++++++ .../tez/schema_evol_orc_vec_mapwork_table.q.out | 3911 +++++++++++++- 85 files changed, 100951 insertions(+), 10048 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index caadf2a..06a6906 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1106,7 +1106,7 @@ public class HiveConf extends Configuration { "than this threshold, it will try to convert the common join into map join"), - HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", false, + HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", true, "Use schema evolution to convert self-describing file format's data to the schema desired by the reader."), HIVE_TRANSACTIONAL_TABLE_SCAN("hive.transactional.table.scan", false, http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index c791ede..88381aa 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -193,8 +193,12 @@ minitez.query.files.shared=acid_globallimit.q,\ schema_evol_orc_nonvec_fetchwork_part.q,\ schema_evol_orc_nonvec_fetchwork_table.q,\ schema_evol_orc_nonvec_mapwork_part.q,\ + schema_evol_orc_nonvec_mapwork_part_all_complex.q,\ + schema_evol_orc_nonvec_mapwork_part_all_primitive.q,\ schema_evol_orc_nonvec_mapwork_table.q,\ schema_evol_orc_vec_mapwork_part.q,\ + schema_evol_orc_vec_mapwork_part_all_complex.q,\ + schema_evol_orc_vec_mapwork_part_all_primitive.q,\ schema_evol_orc_vec_mapwork_table.q,\ schema_evol_text_nonvec_mapwork_part.q,\ schema_evol_text_nonvec_mapwork_part_all_complex.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 5c20caa..9887d77 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3363,12 +3363,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable { && !oldColName.equalsIgnoreCase(oldName)) { throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName); } else if (oldColName.equalsIgnoreCase(oldName)) { - // if orc table, restrict changing column types. Only integer type promotion is supported. - // smallint -> int -> bigint - if (isOrcSchemaEvolution && !isSupportedTypeChange(col.getType(), type)) { - throw new HiveException(ErrorMsg.CANNOT_CHANGE_COLUMN_TYPE, col.getType(), type, - newName); - } col.setName(newName); if (type != null && !type.trim().equals("")) { col.setType(type); @@ -3435,15 +3429,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable { if (replaceCols.size() < existingCols.size()) { throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName()); } - - for (int i = 0; i < existingCols.size(); i++) { - final String currentColType = existingCols.get(i).getType().toLowerCase().trim(); - final String newColType = replaceCols.get(i).getType().toLowerCase().trim(); - if (!isSupportedTypeChange(currentColType, newColType)) { - throw new HiveException(ErrorMsg.REPLACE_UNSUPPORTED_TYPE_CONVERSION, currentColType, - newColType, replaceCols.get(i).getName()); - } - } } sd.setCols(alterTbl.getNewCols()); } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) { @@ -3611,44 +3596,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable { return 0; } - // don't change the order of enums as ordinal values are used to check for valid type promotions - enum PromotableTypes { - SMALLINT, - INT, - BIGINT; - - static List<String> types() { - return ImmutableList.of(SMALLINT.toString().toLowerCase(), - INT.toString().toLowerCase(), BIGINT.toString().toLowerCase()); - } - } - - // for ORC, only supported type promotions are smallint -> int -> bigint. No other - // type promotions are supported at this point - private boolean isSupportedTypeChange(String currentType, String newType) { - if (currentType != null && newType != null) { - currentType = currentType.toLowerCase().trim(); - newType = newType.toLowerCase().trim(); - // no type change - if (currentType.equals(newType)) { - return true; - } - if (PromotableTypes.types().contains(currentType) - && PromotableTypes.types().contains(newType)) { - PromotableTypes pCurrentType = PromotableTypes.valueOf(currentType.toUpperCase()); - PromotableTypes pNewType = PromotableTypes.valueOf(newType.toUpperCase()); - if (pNewType.ordinal() >= pCurrentType.ordinal()) { - return true; - } else { - return false; - } - } else { - return false; - } - } - return true; - } - /** * Drop a given table or some partitions. DropTableDesc is currently used for both. * http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index c965dc8..b7b5ae8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -264,7 +264,7 @@ public class VectorExtractRow { int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { - LOG.info("null string entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum); + nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum); } // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. @@ -280,7 +280,7 @@ public class VectorExtractRow { int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { - LOG.info("null varchar entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum); + nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum); } int adjustedLength = StringExpr.truncate(bytes, start, length, @@ -299,7 +299,7 @@ public class VectorExtractRow { int length = bytesColVector.length[adjustedIndex]; if (bytes == null) { - LOG.info("null char entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum); + nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum); } int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, @@ -343,4 +343,10 @@ public class VectorExtractRow { objects[i] = extractRowColumn(batch, batchIndex, i); } } + + private void nullBytesReadError(PrimitiveCategory primitiveCategory, int batchIndex, + int projectionColumnNum) { + throw new RuntimeException("null " + primitiveCategory.name() + + " entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 5c55011..5454ba3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -787,7 +787,7 @@ public class VectorizationContext { || arg0Type(expr).equals("float"))) { return true; } else if (gudf instanceof GenericUDFBetween && (mode == Mode.PROJECTION)) { - // between has 4 args here, but can be vectorized like this + // between has 4 args here, but can be vectorized like this return true; } return false;
