HAWQ-532. Optimise vseg number for copy to statement.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/c29d6476 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/c29d6476 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/c29d6476 Branch: refs/heads/master Commit: c29d647658e5e4219a370b4869f68cdcd577ab65 Parents: eaaf945 Author: hzhang2 <zhanghuan...@163.com> Authored: Fri Mar 18 10:30:20 2016 +0800 Committer: hzhang2 <zhanghuan...@163.com> Committed: Fri Mar 18 10:42:44 2016 +0800 ---------------------------------------------------------------------- src/backend/cdb/cdbmutate.c | 2 +- src/backend/commands/copy.c | 96 ++++++++++++++++++++++++++++--- src/backend/commands/prepare.c | 2 +- src/backend/optimizer/util/relnode.c | 2 +- src/backend/parser/analyze.c | 8 +-- src/backend/postmaster/identity.c | 2 +- src/backend/utils/misc/guc.c | 12 ++-- src/include/postmaster/identity.h | 2 +- 8 files changed, 103 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/cdb/cdbmutate.c ---------------------------------------------------------------------- diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c index 6959c64..9a998da 100644 --- a/src/backend/cdb/cdbmutate.c +++ b/src/backend/cdb/cdbmutate.c @@ -312,7 +312,7 @@ apply_motion(PlannerInfo *root, Plan *plan, Query *query) targetPolicy = palloc0(sizeof(GpPolicy)- sizeof(targetPolicy->attrs) + maxattrs * sizeof(targetPolicy->attrs[0])); targetPolicy->nattrs = 0; - targetPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(query->intoClause->rel, GetRandomDistPartitionNum()); + targetPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(query->intoClause->rel, GetDefaultPartitionNum()); targetPolicy->ptype = POLICYTYPE_PARTITIONED; /* Find out what the flow is partitioned on */ http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/commands/copy.c ---------------------------------------------------------------------- diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index bcd5384..2f4cf39 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -91,6 +91,11 @@ #include "postmaster/autovacuum.h" #include "cdb/dispatcher.h" +/* + * in dbsize.c + */ +extern int64 calculate_relation_size(Relation rel); + /* DestReceiver for COPY (SELECT) TO */ typedef struct { @@ -137,6 +142,7 @@ static void copy_in_error_callback(void *arg); static void CopyInitPartitioningState(EState *estate); static void CopyInitDataParser(CopyState cstate); static bool CopyCheckIsLastLine(CopyState cstate); +static int calculate_virtual_segment_number(List* candidateRelations); /* ========================================================================== * The follwing macros aid in major refactoring of data processing code (in @@ -1556,7 +1562,16 @@ DoCopy(const CopyStmt *stmt, const char *queryString) target_policy = GpPolicyFetch(CurrentMemoryContext, relid); Assert(target_policy); - target_segment_num = target_policy->bucketnum; + /* + * For hash table we use table bucket number to request vsegs + * For random table, we use a fixed GUC value to request vsegs. + */ + if(target_policy->nattrs > 0){ + target_segment_num = target_policy->bucketnum; + } + else{ + target_segment_num = hawq_rm_nvseg_for_copy_from_perquery; + } pfree(target_policy); cstate->resource = AllocateResource(QRL_ONCE, 1, 1, target_segment_num, target_segment_num,NULL,0); @@ -1749,6 +1764,63 @@ DoCopy(const CopyStmt *stmt, const char *queryString) } /* + * calculate virtual segment number for copy statement. + * if there is hash distributed relations exist, use the max bucket number. + * if all relation are random, use the data size to determine vseg number. + */ +static int calculate_virtual_segment_number(List* candidateOids) { + ListCell* le1; + int vsegNumber = 1; + int64 totalDataSize = 0; + bool isHashRelationExist = false; + int maxHashBucketNumber = 0; + + foreach (le1, candidateOids) + { + Oid candidateOid = InvalidOid; + candidateOid = lfirst_oid(le1); + + //Relation rel = (Relation)lfirst(le1); + Relation rel = relation_open(candidateOid, AccessShareLock); + if (candidateOid > 0 ) { + GpPolicy *targetPolicy = GpPolicyFetch(CurrentMemoryContext, + candidateOid); + if(targetPolicy == NULL){ + return GetAnalyzeVSegNumLimit(); + } + if (targetPolicy->nattrs > 0) { + isHashRelationExist = true; + if(maxHashBucketNumber < targetPolicy->bucketnum){ + maxHashBucketNumber = targetPolicy->bucketnum; + } + } + /* + * if no hash relation, we calculate the data size of all the relations. + */ + if (!isHashRelationExist) { + totalDataSize += calculate_relation_size(rel); + } + } + relation_close(rel, AccessShareLock); + } + + if (isHashRelationExist) { + vsegNumber = maxHashBucketNumber; + } else { + /*we allocate one virtual segment for each 128M data */ + totalDataSize >>= 27; + vsegNumber = totalDataSize + 1; + } + Assert(vsegNumber > 0); + /*vsegNumber should be less than GetUtilPartitionNum*/ + if(vsegNumber > GetQueryVsegNum()){ + vsegNumber = GetQueryVsegNum(); + } + + return vsegNumber; +} + +/* * This intermediate routine exists mainly to localize the effects of setjmp * so we don't need to plaster a lot of variables with "volatile". */ @@ -1809,13 +1881,21 @@ DoCopyTo(CopyState cstate) */ if (Gp_role == GP_ROLE_DISPATCH && cstate->rel && cstate->rel->rd_cdbpolicy) { - GpPolicy *target_policy = NULL; int target_segment_num = 0; + /* + * copy hash table use table bucket number + * copy random table use table size. + */ + PartitionNode *pn = get_parts(cstate->rel->rd_id, 0 /*level*/ , + 0 /*parent*/, false /* inctemplate */, CurrentMemoryContext, true /*includesubparts*/); + Assert(pn); + List *lFullRelOids = NIL; + lFullRelOids = all_leaf_partition_relids(pn); + lFullRelOids = lappend_oid(lFullRelOids, cstate->rel->rd_id); /* root partition */ + lFullRelOids = list_concat(lFullRelOids, all_interior_partition_relids(pn)); /* interior partitions */ - target_policy = GpPolicyFetch(CurrentMemoryContext, cstate->rel->rd_id); - Assert(target_policy); - target_segment_num = target_policy->bucketnum; - pfree(target_policy); + target_segment_num = calculate_virtual_segment_number(lFullRelOids); + elog(LOG, "virtual segment number of copy to is: %d\n", target_segment_num); cstate->resource = AllocateResource(QRL_ONCE, 1, 1, target_segment_num, target_segment_num,NULL,0); CopyToDispatch(cstate); @@ -4200,7 +4280,7 @@ CopyFrom(CopyState cstate) if (cstate->oids && file_has_oids) MemTupleSetOid(tuple, resultRelInfo->ri_aoInsertDesc->mt_bind, loaded_oid); } - else if ((relstorage == RELSTORAGE_PARQUET)) + else if (relstorage == RELSTORAGE_PARQUET) { tuple = NULL; } @@ -4231,7 +4311,7 @@ CopyFrom(CopyState cstate) { HeapTuple newtuple; - if((relstorage == RELSTORAGE_PARQUET)) + if(relstorage == RELSTORAGE_PARQUET) { Assert(!tuple); elog(ERROR, "triggers are not supported on tables that use column-oriented storage"); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/commands/prepare.c ---------------------------------------------------------------------- diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 76c04e9..013af9a 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -242,7 +242,7 @@ ExecuteQuery(ExecuteStmt *stmt, const char *queryString, + 255 * sizeof(pstmt->intoPolicy->attrs[0])); pstmt->intoPolicy->nattrs = 0; pstmt->intoPolicy->ptype = POLICYTYPE_PARTITIONED; - pstmt->intoPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(stmt->into->rel, GetRandomDistPartitionNum()); + pstmt->intoPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(stmt->into->rel, GetDefaultPartitionNum()); MemoryContextSwitchTo(oldContext); } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/optimizer/util/relnode.c ---------------------------------------------------------------------- diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 9e94901..8fb676c 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -142,7 +142,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) { rel->cdbpolicy = (GpPolicy *) palloc(sizeof(GpPolicy)); rel->cdbpolicy->ptype = POLICYTYPE_PARTITIONED; - rel->cdbpolicy->bucketnum = GetRandomDistPartitionNum(); + rel->cdbpolicy->bucketnum = GetDefaultPartitionNum(); rel->cdbpolicy->nattrs = 0; rel->cdbpolicy->attrs[0] = 1; } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/parser/analyze.c ---------------------------------------------------------------------- diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index ad35484..f17c90a 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -2724,7 +2724,7 @@ transformETDistributedBy(ParseState *pstate, CreateStmtContext *cxt, sizeof(p->attrs[0])); p->ptype = POLICYTYPE_PARTITIONED; p->nattrs = 0; - p->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetRandomDistPartitionNum()); + p->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetDefaultPartitionNum()); p->attrs[0] = 1; } @@ -2981,7 +2981,7 @@ transformDistributedBy(ParseState *pstate, CreateStmtContext *cxt, if (policy->bucketnum == -1) { - policy->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetRandomDistPartitionNum()); + policy->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetDefaultPartitionNum()); } *policyp = policy; @@ -12187,7 +12187,7 @@ setQryDistributionPolicy(SelectStmt *stmt, Query *qry) policy->nattrs = 0; policy->attrs[0] = 1; if(stmt->intoClause != NULL) - policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, GetRandomDistPartitionNum()); + policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, GetDefaultPartitionNum()); if (stmt->distributedBy) { @@ -12241,7 +12241,7 @@ setQryDistributionPolicy(SelectStmt *stmt, Query *qry) } else { - policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, GetRandomDistPartitionNum()); + policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options, GetDefaultPartitionNum()); } } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/postmaster/identity.c ---------------------------------------------------------------------- diff --git a/src/backend/postmaster/identity.c b/src/backend/postmaster/identity.c index b209ec5..36ea0cb 100644 --- a/src/backend/postmaster/identity.c +++ b/src/backend/postmaster/identity.c @@ -537,7 +537,7 @@ GetRelOpt_bucket_num_fromRangeVar(const RangeVar* rel_rv, int default_val) } int -GetRandomDistPartitionNum(void) +GetDefaultPartitionNum(void) { return default_hash_table_bucket_number; } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/utils/misc/guc.c ---------------------------------------------------------------------- diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index a6a8bee..56909c9 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -4516,7 +4516,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &default_hash_table_bucket_number, - 6, 1, 10000, NULL, NULL + 6, 1, 65535, NULL, NULL }, { @@ -4525,7 +4525,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &hawq_rm_nvseg_for_copy_from_perquery, - 6, 1, 10000, NULL, NULL + 6, 1, 65535, NULL, NULL }, { @@ -4534,7 +4534,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &hawq_rm_nvseg_for_analyze_perquery_perseg_limit, - 4, 1, 10000, NULL, NULL + 4, 1, 65535, NULL, NULL }, { @@ -4543,7 +4543,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &hawq_rm_nvseg_for_analyze_perquery_limit, - 256, 1, 10000, NULL, NULL + 256, 1, 65535, NULL, NULL }, { @@ -6388,7 +6388,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &rm_nvseg_perquery_limit, - 512, 1, 10000, NULL, NULL + 512, 1, 65535, NULL, NULL }, { @@ -6398,7 +6398,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &rm_nvseg_perquery_perseg_limit, - 6, 1, 10000, NULL, NULL + 6, 1, 65535, NULL, NULL }, { http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/include/postmaster/identity.h ---------------------------------------------------------------------- diff --git a/src/include/postmaster/identity.h b/src/include/postmaster/identity.h index 814935e..f43ab62 100644 --- a/src/include/postmaster/identity.h +++ b/src/include/postmaster/identity.h @@ -91,7 +91,7 @@ extern int GetCopyFromVSegNum(void); extern int GetRelOpt_bucket_num_fromOptions(List *options, int default_val); extern int GetRelOpt_bucket_num_fromRel(Relation relation, int default_val); extern int GetRelOpt_bucket_num_fromRangeVar(const RangeVar* rel_rv, int default_val); -extern int GetRandomDistPartitionNum(void); +extern int GetDefaultPartitionNum(void); extern int GetHashDistPartitionNum(void); extern int GetExternalTablePartitionNum(void); extern int GetAllWorkerHostNum(void);