On Mon, Nov 27, 2023 at 3:05 PM jian he <[email protected]> wrote:
>
> Hi.
> Since both array_op_test, arrest both are not dropped at the end of
> src/test/regress/sql/arrays.sql.
> I found using table array_op_test test more convincing.
>
> select
> reltuples * 10 as original,
> reltuples * (select
> floor(elem_count_histogram[array_length(elem_count_histogram,1)])
> from pg_stats
> where tablename = 'array_op_test' and attname = 'i')
> as with_patch
> ,(select (elem_count_histogram[array_length(elem_count_histogram,1)])
> from pg_stats
> where tablename = 'array_op_test' and attname = 'i')
> as elem_count_histogram_last_element
> from pg_class where relname = 'array_op_test';
> original | with_patch | elem_count_histogram_last_element
> ----------+------------+-----------------------------------
> 1030 | 412 | 4.7843137
> (1 row)
>
> without patch:
> explain select unnest(i) from array_op_test;
> QUERY PLAN
> ----------------------------------------------------------------------
> ProjectSet (cost=0.00..9.95 rows=1030 width=4)
> -> Seq Scan on array_op_test (cost=0.00..4.03 rows=103 width=40)
> (2 rows)
>
> with patch:
> explain select unnest(i) from array_op_test;
> QUERY PLAN
> ----------------------------------------------------------------------
> ProjectSet (cost=0.00..6.86 rows=412 width=4)
> -> Seq Scan on array_op_test (cost=0.00..4.03 rows=103 width=40)
> (2 rows)
> --------
Hi.
I did a minor change. change estimate_array_length return type to
double, cost_tidscan function inside `int ntuples` to `double
ntuples`.
`clamp_row_est(get_function_rows(root, expr->funcid, clause));` will
round 4.7843137 to 5.
so with your patch and my refactor, the rows will be 103 * 5 = 515.
explain select unnest(i) from array_op_test;
QUERY PLAN
----------------------------------------------------------------------
ProjectSet (cost=0.00..7.38 rows=515 width=4)
-> Seq Scan on array_op_test (cost=0.00..4.03 rows=103 width=40)
(2 rows)
From 448e0149d0dd1cfc0ab629a6fc164c6071169926 Mon Sep 17 00:00:00 2001
From: pgaddict <[email protected]>
Date: Thu, 30 Nov 2023 12:15:01 +0800
Subject: [PATCH v1 1/1] minor refactor.
make estimate_array_length return double instead of int.
similarily change estimate_array_length's caller fucntion cost_tidscan's variable ntuple
from type int to double.
because in estimate_array_length, we may need "average count of distinct element values"
info, and that is a float4 data type.
---
src/backend/optimizer/path/costsize.c | 6 +++---
src/backend/utils/adt/selfuncs.c | 18 +++++++++---------
src/include/utils/selfuncs.h | 2 +-
3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 8bcdcc7f..65854fbe 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1227,7 +1227,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
QualCost qpqual_cost;
Cost cpu_per_tuple;
QualCost tid_qual_cost;
- int ntuples;
+ double ntuples;
ListCell *l;
double spc_random_page_cost;
@@ -1242,7 +1242,7 @@ cost_tidscan(Path *path, PlannerInfo *root,
path->rows = baserel->rows;
/* Count how many tuples we expect to retrieve */
- ntuples = 0;
+ ntuples = 0.0;
foreach(l, tidquals)
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
@@ -4741,7 +4741,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
Node *arraynode = (Node *) lsecond(saop->args);
QualCost sacosts;
QualCost hcosts;
- int estarraylen = estimate_array_length(context->root, arraynode);
+ double estarraylen = estimate_array_length(context->root, arraynode);
set_sa_opfuncid(saop);
sacosts.startup = sacosts.per_tuple = 0;
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 6346b41d..f6519c7f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2130,7 +2130,7 @@ scalararraysel(PlannerInfo *root,
*
* It's important that this agree with scalararraysel.
*/
-int
+double
estimate_array_length(PlannerInfo *root, Node *arrayexpr)
{
/* look through any binary-compatible relabeling of arrayexpr */
@@ -2145,12 +2145,12 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
if (arrayisnull)
return 0;
arrayval = DatumGetArrayTypeP(arraydatum);
- return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
+ return (double) ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
}
else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
!((ArrayExpr *) arrayexpr)->multidims)
{
- return list_length(((ArrayExpr *) arrayexpr)->elements);
+ return (double) list_length(((ArrayExpr *) arrayexpr)->elements);
}
else if (arrayexpr && IsA(arrayexpr, Var))
{
@@ -2162,7 +2162,7 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
*/
VariableStatData vardata;
AttStatsSlot sslot;
- int nelem = -1;
+ double nelem = -1;
examine_variable(root, arrayexpr, 0, &vardata);
@@ -2170,18 +2170,18 @@ estimate_array_length(PlannerInfo *root, Node *arrayexpr)
{
if (get_attstatsslot(&sslot, vardata.statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, ATTSTATSSLOT_NUMBERS))
{
- nelem = sslot.numbers[sslot.nnumbers - 1];
+ nelem = (double) sslot.numbers[sslot.nnumbers - 1];
free_attstatsslot(&sslot);
}
}
ReleaseVariableStats(vardata);
if (nelem >= 0)
- return nelem;
+ return (double) nelem;
}
/* default guess --- see also scalararraysel */
- return 10;
+ return 10.0;
}
/*
@@ -6565,7 +6565,7 @@ genericcostestimate(PlannerInfo *root,
if (IsA(rinfo->clause, ScalarArrayOpExpr))
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
- int alength = estimate_array_length(root, lsecond(saop->args));
+ double alength = estimate_array_length(root, lsecond(saop->args));
if (alength > 1)
num_sa_scans *= alength;
@@ -6845,7 +6845,7 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
{
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
Node *other_operand = (Node *) lsecond(saop->args);
- int alength = estimate_array_length(root, other_operand);
+ double alength = estimate_array_length(root, other_operand);
clause_op = saop->opno;
found_saop = true;
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index bf967815..02a6224b 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -200,7 +200,7 @@ extern Selectivity scalararraysel(PlannerInfo *root,
ScalarArrayOpExpr *clause,
bool is_join_clause,
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
-extern int estimate_array_length(PlannerInfo *root, Node *arrayexpr);
+extern double estimate_array_length(PlannerInfo *root, Node *arrayexpr);
extern Selectivity rowcomparesel(PlannerInfo *root,
RowCompareExpr *clause,
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
--
2.34.1