Repository: incubator-hawq
Updated Branches:
  refs/heads/master 7920ad4ec -> a8da53f40


HAWQ-999. Log_error when file count is not in proportion to bucket number of 
hash table.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/a8da53f4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/a8da53f4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/a8da53f4

Branch: refs/heads/master
Commit: a8da53f40f07b25e74640f66ef1e0123ee74c5cf
Parents: 7920ad4
Author: hzhang2 <zhanghuan...@163.com>
Authored: Tue Aug 16 10:54:48 2016 +0800
Committer: hzhang2 <zhanghuan...@163.com>
Committed: Tue Aug 16 10:54:48 2016 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbdatalocality.c | 26 +++++++++++++++++++++++---
 src/backend/utils/misc/guc.c      | 12 ++++++++++++
 src/include/utils/guc.h           |  1 +
 3 files changed, 36 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a8da53f4/src/backend/cdb/cdbdatalocality.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbdatalocality.c 
b/src/backend/cdb/cdbdatalocality.c
index aaa4c8d..3223a37 100644
--- a/src/backend/cdb/cdbdatalocality.c
+++ b/src/backend/cdb/cdbdatalocality.c
@@ -3791,12 +3791,27 @@ run_allocation_algorithm(SplitAllocResult *result, List 
*virtual_segments, Query
                targetPolicy = GpPolicyFetch(CurrentMemoryContext, myrelid);
                bool isRelationHash = is_relation_hash(targetPolicy);
 
+               int fileCountInRelation = list_length(rel_data->files);
+               bool FileCountBucketNumMismatch = false;
+               if (targetPolicy->bucketnum > 0) {
+                 FileCountBucketNumMismatch = fileCountInRelation %
+                   targetPolicy->bucketnum == 0 ? false : true;
+               }
+               if (FileCountBucketNumMismatch && 
!allow_file_count_bucket_num_mismatch) {
+                 elog(ERROR, "file count %d in catalog is not in proportion to 
the bucket "
+                     "number %d of hash table with oid=%u, some data may be 
lost, if you "
+                     "still want to continue the query by considering the 
table as random, set GUC "
+                     "allow_file_count_bucket_num_mismatch to on and try 
again.",
+                     fileCountInRelation, targetPolicy->bucketnum, myrelid);
+               }
                /* change the virtual segment order when keep hash.
                 * order of idMap should also be changed.
+                * if file count of the table is not equal to or multiple of
+                * bucket number, we should process it as random table.
                 */
                if (isRelationHash && context->keep_hash
                                && assignment_context.virtual_segment_num == 
targetPolicy->bucketnum
-                               && !vSegOrderChanged) {
+                               && !vSegOrderChanged && 
!FileCountBucketNumMismatch) {
                        change_hash_virtual_segments_order(resourcePtr, 
rel_data,
                                        &assignment_context, &idMap);
                        for (int p = 0; p < idMap.target_segment_num; p++) {
@@ -3822,8 +3837,13 @@ run_allocation_algorithm(SplitAllocResult *result, List 
*virtual_segments, Query
                uint64_t before_run_allocate_hash_or_random = 
gettime_microsec();
                /*allocate hash relation*/
                if (isRelationHash) {
-                       if (context->keep_hash && 
assignment_context.virtual_segment_num
-                                               == targetPolicy->bucketnum) {
+                 /*
+                  * if file count of the table is not equal to or multiple of
+                  * bucket number, we should process it as random table.
+                  */
+                       if (context->keep_hash
+                           && assignment_context.virtual_segment_num== 
targetPolicy->bucketnum
+                           && !FileCountBucketNumMismatch) {
                                ListCell* parlc;
                                bool parentIsHashExist=false;
                                bool parentIsHash =false;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a8da53f4/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 7300fee..2cdcb62 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -450,6 +450,7 @@ char       *Debug_dtm_action_protocol_str;
 
 /* Enable check for compatibility of encoding and locale in createdb */
 bool           gp_encoding_check_locale_compatibility;
+bool  allow_file_count_bucket_num_mismatch;
 
 char      *pgstat_temp_directory;
 
@@ -3176,6 +3177,17 @@ static struct config_bool ConfigureNamesBool[] =
        },
 
        {
+           {"allow_file_count_bucket_num_mismatch", PGC_POSTMASTER, 
CLIENT_CONN_LOCALE,
+                 gettext_noop("allow hash table to be treated as random when 
file count and"
+                     " bucket number are mismatched"),
+                 NULL,
+                 GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
+           },
+           &allow_file_count_bucket_num_mismatch,
+           false, NULL, NULL
+       },
+
+       {
                {"gp_temporary_files_filespace_repair", PGC_SUSET, 
DEVELOPER_OPTIONS,
                         gettext_noop("Change the filespace inconsistency to a 
warning"),
                         NULL,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a8da53f4/src/include/utils/guc.h
----------------------------------------------------------------------
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 67183af..41f7056 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -310,6 +310,7 @@ extern int 
hawq_rm_nvseg_for_analyze_nopart_perquery_perseg_limit;
 extern int hawq_rm_nvseg_for_analyze_part_perquery_perseg_limit;
 extern int hawq_rm_nvseg_for_analyze_nopart_perquery_limit;
 extern int hawq_rm_nvseg_for_analyze_part_perquery_limit;
+extern bool allow_file_count_bucket_num_mismatch;
 
 extern char *ConfigFileName;
 extern char *HbaFileName;

Reply via email to