HIVE-16605: Enforce NOT NULL constraint (Vineet Garg, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c50ebb34 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c50ebb34 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c50ebb34 Branch: refs/heads/master Commit: c50ebb34bf140956083ba805f9a1a26a795648d0 Parents: 31207ed Author: Vineet Garg <vg...@apache.org> Authored: Wed Feb 7 11:04:30 2018 -0800 Committer: Vineet Garg <vg...@apache.org> Committed: Wed Feb 7 11:05:46 2018 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/exec/FunctionRegistry.java | 1 + .../apache/hadoop/hive/ql/metadata/Hive.java | 37 +- .../hive/ql/parse/BaseSemanticAnalyzer.java | 11 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 1199 ++-- .../hive/ql/parse/TypeCheckProcFactory.java | 2 +- .../hadoop/hive/ql/ppd/OpProcFactory.java | 6 +- .../GenericUDFEnforceNotNullConstraint.java | 76 + .../alter_notnull_constraint_violation.q | 5 + .../create_with_constraints_enable.q | 1 - .../create_with_constraints_enforced.q | 1 - .../create_with_fk_constraints_enforced.q | 3 + .../create_with_pk_constraints_enforced.q | 1 + .../create_with_unique_constraints_enforced.q | 1 + .../clientnegative/insert_into_acid_notnull.q | 9 + .../insert_into_notnull_constraint.q | 3 + .../clientnegative/insert_multi_into_notnull.q | 6 + .../insert_overwrite_notnull_constraint.q | 3 + .../clientnegative/merge_constraint_notnull.q | 17 + .../clientnegative/update_notnull_constraint.q | 9 + .../clientpositive/enforce_constraint_notnull.q | 196 + .../alter_notnull_constraint_violation.q.out | 27 + .../create_with_constraints_enable.q.out | 1 - .../create_with_constraints_enforced.q.out | 1 - .../create_with_fk_constraints_enforced.q.out | 9 + .../create_with_pk_constraints_enforced.q.out | 1 + ...reate_with_unique_constraints_enforced.q.out | 1 + .../insert_into_acid_notnull.q.out | 13 + .../insert_into_notnull_constraint.q.out | 9 + .../insert_multi_into_notnull.q.out | 17 + .../insert_overwrite_notnull_constraint.q.out | 9 + .../merge_constraint_notnull.q.out | 55 + .../update_notnull_constraint.q.out | 24 + .../llap/enforce_constraint_notnull.q.out | 6010 ++++++++++++++++++ .../results/clientpositive/show_functions.q.out | 1 + 36 files changed, 7224 insertions(+), 545 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c50ebb34/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f3e74eb..26e08e4 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1580,6 +1580,9 @@ public class HiveConf extends Configuration { "not a multiple of each other, bucketed map-side join cannot be performed, and the\n" + "query will fail if hive.enforce.bucketmapjoin is set to true."), + HIVE_ENFORCE_NOT_NULL_CONSTRAINT("hive.constraint.notnull.enforce", true, + "Should \"IS NOT NULL \" constraint be enforced?"), + HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false, "Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join."), HIVE_AUTO_SORTMERGE_JOIN_REDUCE("hive.auto.convert.sortmerge.join.reduce.side", true, http://git-wip-us.apache.org/repos/asf/hive/blob/c50ebb34/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 32b7551..9a76b85 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -541,6 +541,7 @@ minillaplocal.query.files=\ dynpart_sort_opt_vectorization.q,\ dynpart_sort_optimization.q,\ dynpart_sort_optimization_acid.q,\ + enforce_constraint_notnull.q,\ escape1.q,\ escape2.q,\ exchgpartition2lel.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/c50ebb34/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 76e8563..f7801bb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -214,6 +214,7 @@ public final class FunctionRegistry { system.registerUDF("rand", UDFRand.class, false); system.registerGenericUDF("abs", GenericUDFAbs.class); system.registerGenericUDF("sq_count_check", GenericUDFSQCountCheck.class); + system.registerGenericUDF("enforce_constraint", GenericUDFEnforceNotNullConstraint.class); system.registerGenericUDF("pmod", GenericUDFPosMod.class); system.registerUDF("ln", UDFLn.class, false); http://git-wip-us.apache.org/repos/asf/hive/blob/c50ebb34/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 07999e2..109f4c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -31,7 +31,17 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.nio.ByteBuffer; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; @@ -40,6 +50,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; +import java.util.Set; import java.util.stream.Collectors; import javax.jdo.JDODataStoreException; @@ -4810,6 +4821,30 @@ private void constructOneLBLocationMap(FileStatus fSta, return getNotNullConstraints(dbName, tblName, true); } + /** + * Get not null constraints associated with the table that are enabled/enforced. + * + * @param dbName Database Name + * @param tblName Table Name + * @return Not null constraints associated with the table. + * @throws HiveException + */ + public NotNullConstraint getEnabledNotNullConstraints(String dbName, String tblName) + throws HiveException { + try { + List<SQLNotNullConstraint> notNullConstraints = getMSC().getNotNullConstraints( + new NotNullConstraintsRequest(dbName, tblName)); + if (notNullConstraints != null && !notNullConstraints.isEmpty()) { + notNullConstraints = notNullConstraints.stream() + .filter(nnc -> nnc.isEnable_cstr()) + .collect(Collectors.toList()); + } + return new NotNullConstraint(notNullConstraints, tblName, dbName); + } catch (Exception e) { + throw new HiveException(e); + } + } + private NotNullConstraint getNotNullConstraints(String dbName, String tblName, boolean onlyReliable) throws HiveException { try { http://git-wip-us.apache.org/repos/asf/hive/blob/c50ebb34/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 372cfad..e553a81 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -778,8 +778,10 @@ public abstract class BaseSemanticAnalyzer { constraintName = unescapeIdentifier(grandChild.getChild(0).getText().toLowerCase()); } else if (type == HiveParser.TOK_ENABLE) { enable = true; - // validate is true by default if we enable the constraint - validate = true; + // validate is false by default if we enable the constraint + // TODO: A constraint like NOT NULL could be enabled using ALTER but VALIDATE remains + // false in such cases. Ideally VALIDATE should be set to true to validate existing data + validate = false; } else if (type == HiveParser.TOK_DISABLE) { enable = false; // validate is false by default if we disable the constraint @@ -792,7 +794,10 @@ public abstract class BaseSemanticAnalyzer { rely = true; } } - if (enable) { + + // NOT NULL constraint could be enforced/enabled + if (child.getToken().getType() != HiveParser.TOK_NOT_NULL + && enable) { throw new SemanticException( ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("ENABLE/ENFORCED feature not supported yet. " + "Please use DISABLE/NOT ENFORCED instead."));