HAWQ-532. Optimise vseg number for copy to statement.

Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/c29d6476
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/c29d6476
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/c29d6476

Branch: refs/heads/master
Commit: c29d647658e5e4219a370b4869f68cdcd577ab65
Parents: eaaf945
Author: hzhang2 <zhanghuan...@163.com>
Authored: Fri Mar 18 10:30:20 2016 +0800
Committer: hzhang2 <zhanghuan...@163.com>
Committed: Fri Mar 18 10:42:44 2016 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbmutate.c          |  2 +-
 src/backend/commands/copy.c          | 96 ++++++++++++++++++++++++++++---
 src/backend/commands/prepare.c       |  2 +-
 src/backend/optimizer/util/relnode.c |  2 +-
 src/backend/parser/analyze.c         |  8 +--
 src/backend/postmaster/identity.c    |  2 +-
 src/backend/utils/misc/guc.c         | 12 ++--
 src/include/postmaster/identity.h    |  2 +-
 8 files changed, 103 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/cdb/cdbmutate.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c
index 6959c64..9a998da 100644
--- a/src/backend/cdb/cdbmutate.c
+++ b/src/backend/cdb/cdbmutate.c
@@ -312,7 +312,7 @@ apply_motion(PlannerInfo *root, Plan *plan, Query *query)
                                
                                        targetPolicy = 
palloc0(sizeof(GpPolicy)- sizeof(targetPolicy->attrs) + maxattrs * 
sizeof(targetPolicy->attrs[0]));
                                        targetPolicy->nattrs = 0;
-                                       targetPolicy->bucketnum = 
GetRelOpt_bucket_num_fromRangeVar(query->intoClause->rel, 
GetRandomDistPartitionNum());
+                                       targetPolicy->bucketnum = 
GetRelOpt_bucket_num_fromRangeVar(query->intoClause->rel, 
GetDefaultPartitionNum());
                                        targetPolicy->ptype = 
POLICYTYPE_PARTITIONED;
                                        
                                        /* Find out what the flow is 
partitioned on */

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/commands/copy.c
----------------------------------------------------------------------
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index bcd5384..2f4cf39 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -91,6 +91,11 @@
 #include "postmaster/autovacuum.h"
 #include "cdb/dispatcher.h"
 
+/*
+ * in dbsize.c
+ */
+extern int64 calculate_relation_size(Relation rel);
+
 /* DestReceiver for COPY (SELECT) TO */
 typedef struct
 {
@@ -137,6 +142,7 @@ static void copy_in_error_callback(void *arg);
 static void CopyInitPartitioningState(EState *estate);
 static void CopyInitDataParser(CopyState cstate);
 static bool CopyCheckIsLastLine(CopyState cstate);
+static int calculate_virtual_segment_number(List* candidateRelations);
 
 /* ==========================================================================
  * The follwing macros aid in major refactoring of data processing code (in
@@ -1556,7 +1562,16 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
 
                        target_policy = GpPolicyFetch(CurrentMemoryContext, 
relid);
                        Assert(target_policy);
-                       target_segment_num = target_policy->bucketnum;
+                       /*
+                        * For hash table we use table bucket number to request 
vsegs
+                        * For random table, we use a fixed GUC value to 
request vsegs.
+                        */
+                       if(target_policy->nattrs > 0){
+                               target_segment_num = target_policy->bucketnum;
+                       }
+                       else{
+                               target_segment_num = 
hawq_rm_nvseg_for_copy_from_perquery;
+                       }
                        pfree(target_policy);
 
                        cstate->resource = AllocateResource(QRL_ONCE, 1, 1, 
target_segment_num, target_segment_num,NULL,0);
@@ -1749,6 +1764,63 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
 }
 
 /*
+ * calculate virtual segment number for copy statement.
+ * if there is hash distributed relations exist, use the max bucket number.
+ * if all relation are random, use the data size to determine vseg number.
+ */
+static int calculate_virtual_segment_number(List* candidateOids) {
+       ListCell* le1;
+       int vsegNumber = 1;
+       int64 totalDataSize = 0;
+       bool isHashRelationExist = false;
+       int maxHashBucketNumber = 0;
+
+       foreach (le1, candidateOids)
+       {
+               Oid                             candidateOid      = InvalidOid;
+               candidateOid = lfirst_oid(le1);
+
+               //Relation rel = (Relation)lfirst(le1);
+               Relation rel = relation_open(candidateOid, AccessShareLock);
+               if (candidateOid > 0 ) {
+                       GpPolicy *targetPolicy = 
GpPolicyFetch(CurrentMemoryContext,
+                                       candidateOid);
+                       if(targetPolicy == NULL){
+                               return GetAnalyzeVSegNumLimit();
+                       }
+                       if (targetPolicy->nattrs > 0) {
+                               isHashRelationExist = true;
+                               if(maxHashBucketNumber < 
targetPolicy->bucketnum){
+                                       maxHashBucketNumber = 
targetPolicy->bucketnum;
+                               }
+                       }
+                       /*
+                        * if no hash relation, we calculate the data size of 
all the relations.
+                        */
+                       if (!isHashRelationExist) {
+                               totalDataSize += calculate_relation_size(rel);
+                       }
+               }
+               relation_close(rel, AccessShareLock);
+       }
+
+       if (isHashRelationExist) {
+               vsegNumber = maxHashBucketNumber;
+       } else {
+               /*we allocate one virtual segment for each 128M data */
+               totalDataSize >>= 27;
+               vsegNumber = totalDataSize + 1;
+       }
+       Assert(vsegNumber > 0);
+       /*vsegNumber should be less than GetUtilPartitionNum*/
+       if(vsegNumber > GetQueryVsegNum()){
+               vsegNumber = GetQueryVsegNum();
+       }
+
+       return vsegNumber;
+}
+
+/*
  * This intermediate routine exists mainly to localize the effects of setjmp
  * so we don't need to plaster a lot of variables with "volatile".
  */
@@ -1809,13 +1881,21 @@ DoCopyTo(CopyState cstate)
                 */
                if (Gp_role == GP_ROLE_DISPATCH && cstate->rel && 
cstate->rel->rd_cdbpolicy)
                {
-                       GpPolicy *target_policy = NULL;
                        int target_segment_num = 0;
+                       /*
+                        * copy hash table use table bucket number
+                        * copy random table use table size.
+                        */
+                       PartitionNode *pn = get_parts(cstate->rel->rd_id, 0 
/*level*/ ,
+                                                                               
                        0 /*parent*/, false /* inctemplate */, 
CurrentMemoryContext, true /*includesubparts*/);
+                       Assert(pn);
+                       List            *lFullRelOids = NIL;
+                       lFullRelOids = all_leaf_partition_relids(pn);
+                       lFullRelOids = lappend_oid(lFullRelOids, 
cstate->rel->rd_id); /* root partition */
+                       lFullRelOids = list_concat(lFullRelOids, 
all_interior_partition_relids(pn)); /* interior partitions */
 
-                       target_policy = GpPolicyFetch(CurrentMemoryContext, 
cstate->rel->rd_id);
-                       Assert(target_policy);
-                       target_segment_num = target_policy->bucketnum;
-                       pfree(target_policy);
+                       target_segment_num = 
calculate_virtual_segment_number(lFullRelOids);
+                       elog(LOG, "virtual segment number of copy to is: %d\n", 
target_segment_num);
 
                        cstate->resource = AllocateResource(QRL_ONCE, 1, 1, 
target_segment_num, target_segment_num,NULL,0);
                        CopyToDispatch(cstate);
@@ -4200,7 +4280,7 @@ CopyFrom(CopyState cstate)
                                        if (cstate->oids && file_has_oids)
                                                MemTupleSetOid(tuple, 
resultRelInfo->ri_aoInsertDesc->mt_bind, loaded_oid);
                                }
-                               else if ((relstorage == RELSTORAGE_PARQUET))
+                               else if (relstorage == RELSTORAGE_PARQUET)
                                {
                     tuple = NULL;
                                }
@@ -4231,7 +4311,7 @@ CopyFrom(CopyState cstate)
                                {
                                        HeapTuple       newtuple;
 
-                                       if((relstorage == RELSTORAGE_PARQUET))
+                                       if(relstorage == RELSTORAGE_PARQUET)
                                        {
                                                Assert(!tuple);
                                                elog(ERROR, "triggers are not 
supported on tables that use column-oriented storage");

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/commands/prepare.c
----------------------------------------------------------------------
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 76c04e9..013af9a 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -242,7 +242,7 @@ ExecuteQuery(ExecuteStmt *stmt, const char *queryString,
                                                                        + 255 * 
sizeof(pstmt->intoPolicy->attrs[0]));
                pstmt->intoPolicy->nattrs = 0;
                pstmt->intoPolicy->ptype = POLICYTYPE_PARTITIONED;
-               pstmt->intoPolicy->bucketnum = 
GetRelOpt_bucket_num_fromRangeVar(stmt->into->rel, GetRandomDistPartitionNum());
+               pstmt->intoPolicy->bucketnum = 
GetRelOpt_bucket_num_fromRangeVar(stmt->into->rel, GetDefaultPartitionNum());
                
                MemoryContextSwitchTo(oldContext);
        }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/optimizer/util/relnode.c
----------------------------------------------------------------------
diff --git a/src/backend/optimizer/util/relnode.c 
b/src/backend/optimizer/util/relnode.c
index 9e94901..8fb676c 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -142,7 +142,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind 
reloptkind)
                        {
                                rel->cdbpolicy = (GpPolicy *) 
palloc(sizeof(GpPolicy));
                                rel->cdbpolicy->ptype = POLICYTYPE_PARTITIONED;
-                               rel->cdbpolicy->bucketnum = 
GetRandomDistPartitionNum();
+                               rel->cdbpolicy->bucketnum = 
GetDefaultPartitionNum();
                                rel->cdbpolicy->nattrs = 0;
                                rel->cdbpolicy->attrs[0] = 1;
                        }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/parser/analyze.c
----------------------------------------------------------------------
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index ad35484..f17c90a 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -2724,7 +2724,7 @@ transformETDistributedBy(ParseState *pstate, 
CreateStmtContext *cxt,
                                                                                
 sizeof(p->attrs[0]));
                        p->ptype = POLICYTYPE_PARTITIONED;
                        p->nattrs = 0;
-                       p->bucketnum = 
GetRelOpt_bucket_num_fromOptions(options, GetRandomDistPartitionNum());
+                       p->bucketnum = 
GetRelOpt_bucket_num_fromOptions(options, GetDefaultPartitionNum());
                        p->attrs[0] = 1;
                }
 
@@ -2981,7 +2981,7 @@ transformDistributedBy(ParseState *pstate, 
CreateStmtContext *cxt,
 
        if (policy->bucketnum == -1)
        {
-               policy->bucketnum = GetRelOpt_bucket_num_fromOptions(options, 
GetRandomDistPartitionNum());
+               policy->bucketnum = GetRelOpt_bucket_num_fromOptions(options, 
GetDefaultPartitionNum());
        }
 
        *policyp = policy;
@@ -12187,7 +12187,7 @@ setQryDistributionPolicy(SelectStmt *stmt, Query *qry)
        policy->nattrs = 0;
        policy->attrs[0] = 1;
        if(stmt->intoClause != NULL)
-               policy->bucketnum = 
GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, 
GetRandomDistPartitionNum());
+               policy->bucketnum = 
GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, 
GetDefaultPartitionNum());
 
        if (stmt->distributedBy)
        {
@@ -12241,7 +12241,7 @@ setQryDistributionPolicy(SelectStmt *stmt, Query *qry)
                }
                else
                {
-                       policy->bucketnum = 
GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, 
GetRandomDistPartitionNum());
+                       policy->bucketnum = 
GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, 
GetDefaultPartitionNum());
                }
        }
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/postmaster/identity.c
----------------------------------------------------------------------
diff --git a/src/backend/postmaster/identity.c 
b/src/backend/postmaster/identity.c
index b209ec5..36ea0cb 100644
--- a/src/backend/postmaster/identity.c
+++ b/src/backend/postmaster/identity.c
@@ -537,7 +537,7 @@ GetRelOpt_bucket_num_fromRangeVar(const RangeVar* rel_rv, 
int default_val)
 }
 
 int
-GetRandomDistPartitionNum(void)
+GetDefaultPartitionNum(void)
 {
        return default_hash_table_bucket_number;
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a6a8bee..56909c9 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -4516,7 +4516,7 @@ static struct config_int ConfigureNamesInt[] =
                        NULL
                },
                &default_hash_table_bucket_number,
-               6, 1, 10000, NULL, NULL
+               6, 1, 65535, NULL, NULL
        },
 
        {
@@ -4525,7 +4525,7 @@ static struct config_int ConfigureNamesInt[] =
                        NULL
                },
                &hawq_rm_nvseg_for_copy_from_perquery,
-               6, 1, 10000, NULL, NULL
+               6, 1, 65535, NULL, NULL
        },
 
        {
@@ -4534,7 +4534,7 @@ static struct config_int ConfigureNamesInt[] =
                        NULL
                },
                &hawq_rm_nvseg_for_analyze_perquery_perseg_limit,
-               4, 1, 10000, NULL, NULL
+               4, 1, 65535, NULL, NULL
        },
 
        {
@@ -4543,7 +4543,7 @@ static struct config_int ConfigureNamesInt[] =
                        NULL
                },
                &hawq_rm_nvseg_for_analyze_perquery_limit,
-               256, 1, 10000, NULL, NULL
+               256, 1, 65535, NULL, NULL
        },
 
        {
@@ -6388,7 +6388,7 @@ static struct config_int ConfigureNamesInt[] =
                     NULL
             },
             &rm_nvseg_perquery_limit,
-            512, 1, 10000, NULL, NULL
+            512, 1, 65535, NULL, NULL
     },
 
     {
@@ -6398,7 +6398,7 @@ static struct config_int ConfigureNamesInt[] =
                     NULL
             },
             &rm_nvseg_perquery_perseg_limit,
-            6, 1, 10000, NULL, NULL
+            6, 1, 65535, NULL, NULL
     },
 
     {

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/include/postmaster/identity.h
----------------------------------------------------------------------
diff --git a/src/include/postmaster/identity.h 
b/src/include/postmaster/identity.h
index 814935e..f43ab62 100644
--- a/src/include/postmaster/identity.h
+++ b/src/include/postmaster/identity.h
@@ -91,7 +91,7 @@ extern int     GetCopyFromVSegNum(void);
 extern int GetRelOpt_bucket_num_fromOptions(List *options, int default_val);
 extern int GetRelOpt_bucket_num_fromRel(Relation relation, int default_val);
 extern int GetRelOpt_bucket_num_fromRangeVar(const RangeVar* rel_rv, int 
default_val);
-extern int GetRandomDistPartitionNum(void);
+extern int GetDefaultPartitionNum(void);
 extern int GetHashDistPartitionNum(void);
 extern int GetExternalTablePartitionNum(void);
 extern int GetAllWorkerHostNum(void);

Reply via email to