On 15/03/2016 21:12, Robert Haas wrote:
> On Mon, Mar 14, 2016 at 9:25 PM, David Rowley
> <[email protected]> wrote:
>> Over in [1] James mentioned about wanting more to be able to have more
>> influence over the partial path's parallel_degree decision. At risk
>> of a discussion on that hijacking the parallel aggregate thread, I
>> thought I'd start this for anyone who would want to discuss making
>> changes to that.
>>
>> I've attached a simple C program which shows the parallel_degree which
>> will be chosen at the moment. For now it's based on the size of the
>> base relation. Perhaps that will need to be rethought later, perhaps
>> based on costs. But I just don't think it's something for 9.6.
>
> I thought about this a bit more. There are a couple of easy things we
> could do here.
>
> The 1000-page threshold could be made into a GUC.
>
> We could add a per-table reloption for parallel-degree that would
> override the calculation.
>
> Neither of those things is very smart, but they'd probably both help
> some people. If someone is able to produce a patch for either or both
> of these things *quickly*, we could possibly try to squeeze it into
> 9.6 as a cleanup of work already done.
>
I'm not too familiar with parallel planning, but I tried to implement
both in attached patch. I didn't put much effort into the
parallel_threshold GUC documentation, because I didn't really see a good
way to explain it. I'd e happy to improve it if needed. Also, to make
this parameter easier to tune for users, perhaps we could divide the
default value by 3 and use it as is in the first iteration in
create_parallel_path() ?
Also, global max_parallel_degree still needs to be at least 1 for the
per table value to be considered.
--
Julien Rouhaud
http://dalibo.com - http://dalibo.org
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6c73fb4..472f3d5 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3912,6 +3912,22 @@ SELECT * FROM parent WHERE key = 2400;
</listitem>
</varlistentry>
+ <varlistentry id="guc-parallel-treshold" xreflabel="parallel-treshold">
+ <term><varname>parallel_threshold</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>parallel_threshold</> configuration
parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ The planner will decide to use parallel plans for relation that are at
+ least larger than this parameter. If you lower this setting, the
planner
+ will use more parallel workers.
+ </para>
+
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-force-parallel-mode"
xreflabel="force_parallel_mode">
<term><varname>force_parallel_mode</varname> (<type>enum</type>)
<indexterm>
diff --git a/doc/src/sgml/ref/create_table.sgml
b/doc/src/sgml/ref/create_table.sgml
index cd234db..be9db15 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -909,6 +909,15 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } |
UNLOGGED ] TABLE [ IF NOT EXI
</varlistentry>
<varlistentry>
+ <term><literal>max_parallel_degree</> (<type>integer</>)</term>
+ <listitem>
+ <para>
+ Per-table value for <xref linkend="gux-max-parallel-degree"> parameter.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><literal>autovacuum_enabled</>,
<literal>toast.autovacuum_enabled</literal> (<type>boolean</>)</term>
<listitem>
<para>
diff --git a/src/backend/access/common/reloptions.c
b/src/backend/access/common/reloptions.c
index ea0755a..0b06650 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -267,6 +267,15 @@ static relopt_int intRelOpts[] =
0, 0, 0
#endif
},
+ {
+ {
+ "max_parallel_degree",
+ "Maximum number of parallel processes per executor node
for this relation.",
+ RELOPT_KIND_HEAP,
+ AccessExclusiveLock
+ },
+ -1, 1, INT_MAX
+ },
/* list terminator */
{{NULL}}
@@ -1291,7 +1300,9 @@ default_reloptions(Datum reloptions, bool validate,
relopt_kind kind)
{"autovacuum_analyze_scale_factor", RELOPT_TYPE_REAL,
offsetof(StdRdOptions, autovacuum) +offsetof(AutoVacOpts,
analyze_scale_factor)},
{"user_catalog_table", RELOPT_TYPE_BOOL,
- offsetof(StdRdOptions, user_catalog_table)}
+ offsetof(StdRdOptions, user_catalog_table)},
+ {"max_parallel_degree", RELOPT_TYPE_INT,
+ offsetof(StdRdOptions, max_parallel_degree)}
};
options = parseRelOptions(reloptions, validate, kind, &numoptions);
diff --git a/src/backend/optimizer/path/allpaths.c
b/src/backend/optimizer/path/allpaths.c
index 4f60b85..2570619 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -659,8 +659,12 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte)
static void
create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
{
- int parallel_threshold = 1000;
+ int p_threshold = parallel_threshold;
int parallel_degree = 1;
+ int p_max_degree = max_parallel_degree;
+
+ if (rel->max_parallel_degree != -1)
+ p_max_degree = rel->max_parallel_degree;
/*
* If this relation is too small to be worth a parallel scan, just
return
@@ -669,7 +673,7 @@ create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
* just for this relation, but when combined with all of its
inheritance siblings
* it may well pay off.
*/
- if (rel->pages < parallel_threshold && rel->reloptkind ==
RELOPT_BASEREL)
+ if (rel->pages < p_threshold && rel->reloptkind == RELOPT_BASEREL)
return;
/*
@@ -677,12 +681,12 @@ create_parallel_paths(PlannerInfo *root, RelOptInfo *rel)
* relation. This probably needs to be a good deal more sophisticated,
but we
* need something here for now.
*/
- while (rel->pages > parallel_threshold * 3 &&
- parallel_degree < max_parallel_degree)
+ while (rel->pages > p_threshold * 3 &&
+ parallel_degree < p_max_degree)
{
parallel_degree++;
- parallel_threshold *= 3;
- if (parallel_threshold >= PG_INT32_MAX / 3)
+ p_threshold *= 3;
+ if (p_threshold >= PG_INT32_MAX / 3)
break;
}
diff --git a/src/backend/optimizer/path/costsize.c
b/src/backend/optimizer/path/costsize.c
index 943fcde..d53e794 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -113,6 +113,7 @@ int effective_cache_size =
DEFAULT_EFFECTIVE_CACHE_SIZE;
Cost disable_cost = 1.0e10;
int max_parallel_degree = 0;
+int parallel_threshold = DEFAULT_PARALLEL_THRESHOLD;
bool enable_seqscan = true;
bool enable_indexscan = true;
diff --git a/src/backend/optimizer/util/plancat.c
b/src/backend/optimizer/util/plancat.c
index ad715bb..18389f0 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -128,6 +128,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent,
estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
&rel->pages, &rel->tuples,
&rel->allvisfrac);
+ /* Setup the per-relation max_parallel_degree */
+ rel->max_parallel_degree = RelationGetMaxParallelDegree(relation, -1);
/*
* Make list of indexes. Ignore indexes on system catalogs if told to.
* Don't bother with indexes for an inheritance parent, either.
diff --git a/src/backend/optimizer/util/relnode.c
b/src/backend/optimizer/util/relnode.c
index 20e4bf7..d1f7549 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -107,6 +107,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind
reloptkind)
rel->consider_startup = (root->tuple_fraction > 0);
rel->consider_param_startup = false; /* might get changed
later */
rel->consider_parallel = false; /* might get changed later */
+ rel->max_parallel_degree = -1; /* set up in GetRelationInfo */
rel->reltarget = create_empty_pathtarget();
rel->pathlist = NIL;
rel->ppilist = NIL;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index edcafce..cfab130 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1697,6 +1697,17 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
{
+ {"parallel_threshold", PGC_USERSET, QUERY_TUNING_OTHER,
+ gettext_noop("Sets the minimum size of a relation to
consider a"
+ "parallel plan."),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ ¶llel_threshold,
+ 1000, 1, INT_MAX,
+ NULL, NULL, NULL
+ },
+ {
{"geqo_threshold", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Sets the threshold of FROM items beyond
which GEQO is used."),
NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample
b/src/backend/utils/misc/postgresql.conf.sample
index ee3d378..5d4a499 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -315,6 +315,7 @@
#join_collapse_limit = 8 # 1 disables collapsing of explicit
# JOIN clauses
#force_parallel_mode = off
+#parallel_threshold = 8MB # minimum relation size to consider a parallel plan
#------------------------------------------------------------------------------
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 6a81416..d427219 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1782,6 +1782,7 @@ psql_completion(const char *text, int start, int end)
"autovacuum_vacuum_scale_factor",
"autovacuum_vacuum_threshold",
"fillfactor",
+ "max_parallel_degree",
"log_autovacuum_min_duration",
"toast.autovacuum_enabled",
"toast.autovacuum_freeze_max_age",
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 5032696..8cbc41d 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -487,6 +487,7 @@ typedef struct RelOptInfo
bool consider_startup; /* keep
cheap-startup-cost paths? */
bool consider_param_startup; /* ditto, for parameterized
paths? */
bool consider_parallel; /* consider parallel
paths? */
+ int max_parallel_degree; /* if overloaded for
the rel */
/* default result targetlist for Paths scanning this relation */
struct PathTarget *reltarget; /* list of Vars/Exprs, cost,
width */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index fea2bb7..280f0e5 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -28,6 +28,7 @@
#define DEFAULT_CPU_OPERATOR_COST 0.0025
#define DEFAULT_PARALLEL_TUPLE_COST 0.1
#define DEFAULT_PARALLEL_SETUP_COST 1000.0
+#define DEFAULT_PARALLEL_THRESHOLD 1000
#define DEFAULT_EFFECTIVE_CACHE_SIZE 524288 /* measured in pages */
@@ -55,6 +56,7 @@ extern PGDLLIMPORT double parallel_setup_cost;
extern PGDLLIMPORT int effective_cache_size;
extern Cost disable_cost;
extern int max_parallel_degree;
+extern int parallel_threshold;
extern bool enable_seqscan;
extern bool enable_indexscan;
extern bool enable_indexonlyscan;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index f2bebf2..fc66de8 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -203,6 +203,7 @@ typedef struct StdRdOptions
AutoVacOpts autovacuum; /* autovacuum-related options */
bool user_catalog_table; /* use as an additional
catalog
* relation */
+ int max_parallel_degree; /* max number of parallel
worker */
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10
@@ -217,6 +218,15 @@ typedef struct StdRdOptions
((StdRdOptions *) (relation)->rd_options)->fillfactor : (defaultff))
/*
+ * RelationGetMaxParallelDegree
+ * Returns the relation's max_parallel_degree. Note multiple eval
of
+ * argument!
+ */
+#define RelationGetMaxParallelDegree(relation, defaultmpd) \
+ ((relation)->rd_options ? \
+ ((StdRdOptions *) (relation)->rd_options)->max_parallel_degree :
(defaultmpd))
+
+/*
* RelationGetTargetPageUsage
* Returns the relation's desired space usage per page in bytes.
*/
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers