Well, I was regretting missing the deadline for this CommitFest and
then realized today was only the 14th, so I finished this up while the
kids were napping.
I ended up not reusing the reloptions.c code. It looks like a lot of
extra complexity for no obvious benefit, considering that there is no
equivalent of AMs for tablespaces and therefore no need to support
AM-specific options. I did reuse the reloptions syntax, and I think
the internal representation could always be redone later, if we find
that there's a use case for something more complicated.
...Robert
*** a/doc/src/sgml/config.sgml
--- b/doc/src/sgml/config.sgml
***************
*** 1935,1940 **** archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows
--- 1935,1943 ----
<para>
Sets the planner's estimate of the cost of a disk page fetch
that is part of a series of sequential fetches. The default is 1.0.
+ This value can be overriden for a particular tablespace by setting
+ the tablespace parameter of the same name
+ (see <xref linkend="sql-altertablespace">).
</para>
</listitem>
</varlistentry>
***************
*** 1948,1953 **** archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows
--- 1951,1962 ----
<para>
Sets the planner's estimate of the cost of a
non-sequentially-fetched disk page. The default is 4.0.
+ This value can be overriden for a particular tablespace by setting
+ the tablespace parameter of the same name
+ (see <xref linkend="sql-altertablespace">).
+ </para>
+
+ <para>
Reducing this value relative to <varname>seq_page_cost</>
will cause the system to prefer index scans; raising it will
make index scans look relatively more expensive. You can raise
*** a/doc/src/sgml/ref/alter_tablespace.sgml
--- b/doc/src/sgml/ref/alter_tablespace.sgml
***************
*** 23,28 **** PostgreSQL documentation
--- 23,30 ----
<synopsis>
ALTER TABLESPACE <replaceable>name</replaceable> RENAME TO <replaceable>new_name</replaceable>
ALTER TABLESPACE <replaceable>name</replaceable> OWNER TO <replaceable>new_owner</replaceable>
+ ALTER TABLESPACE <replaceable>name</replaceable> SET ( <replaceable class="PARAMETER">tablespace_option</replaceable> = <replaceable class="PARAMETER">value</replaceable> [, ... ] )
+ ALTER TABLESPACE <replaceable>name</replaceable> RESET ( <replaceable class="PARAMETER">tablespace_option</replaceable> [, ... ] )
</synopsis>
</refsynopsisdiv>
***************
*** 74,79 **** ALTER TABLESPACE <replaceable>name</replaceable> OWNER TO <replaceable>new_owner
--- 76,99 ----
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="parameter">tablespace_parameter</replaceable></term>
+ <listitem>
+ <para>
+ A tablespace parameter to be set or reset. Currently, the only
+ available parameters are <varname>seq_page_cost</> and
+ <varname>random_page_cost</>. Setting either value for a particular
+ tablespace will override the planner's usual estimate of the cost of
+ reading pages from tables in that tablespace, as established by
+ the configuration parameters of the same name (see
+ <xref linkend="guc-seq-page-cost">,
+ <xref linkend="guc-random-page-cost">). This may be useful if one
+ tablespace is located on a disk which is faster or slower than the
+ remainder of the I/O subsystem.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
*** a/src/backend/catalog/aclchk.c
--- b/src/backend/catalog/aclchk.c
***************
*** 2621,2638 **** ExecGrant_Tablespace(InternalGrant *istmt)
int nnewmembers;
Oid *oldmembers;
Oid *newmembers;
- ScanKeyData entry[1];
- SysScanDesc scan;
HeapTuple tuple;
! /* There's no syscache for pg_tablespace, so must look the hard way */
! ScanKeyInit(&entry[0],
! ObjectIdAttributeNumber,
! BTEqualStrategyNumber, F_OIDEQ,
! ObjectIdGetDatum(tblId));
! scan = systable_beginscan(relation, TablespaceOidIndexId, true,
! SnapshotNow, 1, entry);
! tuple = systable_getnext(scan);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for tablespace %u", tblId);
--- 2621,2631 ----
int nnewmembers;
Oid *oldmembers;
Oid *newmembers;
HeapTuple tuple;
! /* Search syscache for pg_tablespace */
! tuple = SearchSysCache(TABLESPACEOID, ObjectIdGetDatum(tblId),
! 0, 0, 0);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for tablespace %u", tblId);
***************
*** 2703,2709 **** ExecGrant_Tablespace(InternalGrant *istmt)
noldmembers, oldmembers,
nnewmembers, newmembers);
! systable_endscan(scan);
pfree(new_acl);
--- 2696,2702 ----
noldmembers, oldmembers,
nnewmembers, newmembers);
! ReleaseSysCache(tuple);
pfree(new_acl);
***************
*** 3443,3451 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid,
AclMode mask, AclMaskHow how)
{
AclMode result;
- Relation pg_tablespace;
- ScanKeyData entry[1];
- SysScanDesc scan;
HeapTuple tuple;
Datum aclDatum;
bool isNull;
--- 3436,3441 ----
***************
*** 3458,3474 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid,
/*
* Get the tablespace's ACL from pg_tablespace
! *
! * There's no syscache for pg_tablespace, so must look the hard way
! */
! pg_tablespace = heap_open(TableSpaceRelationId, AccessShareLock);
! ScanKeyInit(&entry[0],
! ObjectIdAttributeNumber,
! BTEqualStrategyNumber, F_OIDEQ,
! ObjectIdGetDatum(spc_oid));
! scan = systable_beginscan(pg_tablespace, TablespaceOidIndexId, true,
! SnapshotNow, 1, entry);
! tuple = systable_getnext(scan);
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
--- 3448,3456 ----
/*
* Get the tablespace's ACL from pg_tablespace
! */
! tuple = SearchSysCache(TABLESPACEOID, ObjectIdGetDatum(spc_oid),
! 0, 0, 0);
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
***************
*** 3476,3483 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid,
ownerId = ((Form_pg_tablespace) GETSTRUCT(tuple))->spcowner;
! aclDatum = heap_getattr(tuple, Anum_pg_tablespace_spcacl,
! RelationGetDescr(pg_tablespace), &isNull);
if (isNull)
{
--- 3458,3466 ----
ownerId = ((Form_pg_tablespace) GETSTRUCT(tuple))->spcowner;
! aclDatum = SysCacheGetAttr(TABLESPACEOID, tuple,
! Anum_pg_tablespace_spcacl,
! &isNull);
if (isNull)
{
***************
*** 3497,3504 **** pg_tablespace_aclmask(Oid spc_oid, Oid roleid,
if (acl && (Pointer) acl != DatumGetPointer(aclDatum))
pfree(acl);
! systable_endscan(scan);
! heap_close(pg_tablespace, AccessShareLock);
return result;
}
--- 3480,3486 ----
if (acl && (Pointer) acl != DatumGetPointer(aclDatum))
pfree(acl);
! ReleaseSysCache(tuple);
return result;
}
***************
*** 4025,4033 **** pg_namespace_ownercheck(Oid nsp_oid, Oid roleid)
bool
pg_tablespace_ownercheck(Oid spc_oid, Oid roleid)
{
- Relation pg_tablespace;
- ScanKeyData entry[1];
- SysScanDesc scan;
HeapTuple spctuple;
Oid spcowner;
--- 4007,4012 ----
***************
*** 4035,4051 **** pg_tablespace_ownercheck(Oid spc_oid, Oid roleid)
if (superuser_arg(roleid))
return true;
! /* There's no syscache for pg_tablespace, so must look the hard way */
! pg_tablespace = heap_open(TableSpaceRelationId, AccessShareLock);
! ScanKeyInit(&entry[0],
! ObjectIdAttributeNumber,
! BTEqualStrategyNumber, F_OIDEQ,
! ObjectIdGetDatum(spc_oid));
! scan = systable_beginscan(pg_tablespace, TablespaceOidIndexId, true,
! SnapshotNow, 1, entry);
!
! spctuple = systable_getnext(scan);
!
if (!HeapTupleIsValid(spctuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
--- 4014,4022 ----
if (superuser_arg(roleid))
return true;
! /* Search syscache for pg_tablespace */
! spctuple = SearchSysCache(TABLESPACEOID, ObjectIdGetDatum(spc_oid),
! 0, 0, 0);
if (!HeapTupleIsValid(spctuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
***************
*** 4053,4060 **** pg_tablespace_ownercheck(Oid spc_oid, Oid roleid)
spcowner = ((Form_pg_tablespace) GETSTRUCT(spctuple))->spcowner;
! systable_endscan(scan);
! heap_close(pg_tablespace, AccessShareLock);
return has_privs_of_role(roleid, spcowner);
}
--- 4024,4030 ----
spcowner = ((Form_pg_tablespace) GETSTRUCT(spctuple))->spcowner;
! ReleaseSysCache(spctuple);
return has_privs_of_role(roleid, spcowner);
}
*** a/src/backend/commands/tablespace.c
--- b/src/backend/commands/tablespace.c
***************
*** 56,61 ****
--- 56,62 ----
#include "catalog/indexing.h"
#include "catalog/pg_tablespace.h"
#include "commands/comment.h"
+ #include "commands/defrem.h"
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "postmaster/bgwriter.h"
***************
*** 77,82 **** char *temp_tablespaces = NULL;
--- 78,84 ----
static bool remove_tablespace_directories(Oid tablespaceoid, bool redo);
static void set_short_version(const char *path);
+ static double interpret_page_cost(DefElem *opt);
/*
***************
*** 284,289 **** CreateTableSpace(CreateTableSpaceStmt *stmt)
--- 286,295 ----
DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename));
values[Anum_pg_tablespace_spcowner - 1] =
ObjectIdGetDatum(ownerId);
+ values[Anum_pg_tablespace_spcseq_page_cost - 1] =
+ Float8GetDatum(-1.0);
+ values[Anum_pg_tablespace_spcrandom_page_cost - 1] =
+ Float8GetDatum(-1.0);
values[Anum_pg_tablespace_spclocation - 1] =
CStringGetTextDatum(location);
nulls[Anum_pg_tablespace_spcacl - 1] = true;
***************
*** 910,915 **** AlterTableSpaceOwner(const char *name, Oid newOwnerId)
--- 916,1029 ----
/*
+ * Alter table space options
+ */
+ void
+ AlterTableSpace(AlterTableSpaceStmt *stmt)
+ {
+ Relation rel;
+ ScanKeyData entry[1];
+ HeapScanDesc scandesc;
+ HeapTuple tup;
+ Datum repl_val[Natts_pg_tablespace];
+ bool repl_null[Natts_pg_tablespace];
+ bool repl_repl[Natts_pg_tablespace];
+ HeapTuple newtuple;
+ ListCell *lc;
+
+ /* Search pg_tablespace */
+ rel = heap_open(TableSpaceRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(stmt->tablespacename));
+ scandesc = heap_beginscan(rel, SnapshotNow, 1, entry);
+ tup = heap_getnext(scandesc, ForwardScanDirection);
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ stmt->tablespacename)));
+
+ /* Must be owner of the existing object */
+ if (!pg_tablespace_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE,
+ stmt->tablespacename);
+
+ /* Prepare to build new tuple. */
+ memset(repl_null, false, sizeof(repl_null));
+ memset(repl_repl, false, sizeof(repl_repl));
+
+ /* Parse options list. */
+ foreach(lc, stmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ if (strcmp(opt->defname, "seq_page_cost") == 0)
+ {
+ double newval = interpret_page_cost(opt);
+ repl_repl[Anum_pg_tablespace_spcseq_page_cost - 1] = true;
+ repl_val[Anum_pg_tablespace_spcseq_page_cost - 1] =
+ Float8GetDatum(newval);
+ }
+ else if (strcmp(opt->defname, "random_page_cost") == 0)
+ {
+ double newval = interpret_page_cost(opt);
+ repl_repl[Anum_pg_tablespace_spcrandom_page_cost - 1] = true;
+ repl_val[Anum_pg_tablespace_spcrandom_page_cost - 1] =
+ Float8GetDatum(newval);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized parameter \"%s\"", opt->defname)));
+ }
+ }
+
+ /* Update system catalog. */
+ newtuple = heap_modify_tuple(tup, RelationGetDescr(rel), repl_val,
+ repl_null, repl_repl);
+ simple_heap_update(rel, &newtuple->t_self, newtuple);
+ CatalogUpdateIndexes(rel, newtuple);
+ heap_freetuple(newtuple);
+
+ /* Conclude heap scan. */
+ heap_endscan(scandesc);
+ heap_close(rel, NoLock);
+ }
+
+ /*
+ * Friendly helper function for making sense of page cost parameters.
+ */
+ static double
+ interpret_page_cost(DefElem *opt)
+ {
+ double newval;
+
+ if (opt->defaction == DEFELEM_DROP)
+ {
+ if (opt->arg != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("RESET must not include values for parameters")));
+ newval = -1.0;
+ }
+ else
+ {
+ newval = defGetNumeric(opt);
+ if (newval < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("%g is outside the valid range for parameter \"%s\" (%g .. %g)",
+ newval, opt->defname, 0.0, DBL_MAX)));
+ }
+
+ return newval;
+ }
+
+ /*
* Routines for handling the GUC variable 'default_tablespace'.
*/
*** a/src/backend/nodes/copyfuncs.c
--- b/src/backend/nodes/copyfuncs.c
***************
*** 3058,3063 **** _copyDropTableSpaceStmt(DropTableSpaceStmt *from)
--- 3058,3074 ----
return newnode;
}
+ static AlterTableSpaceStmt *
+ _copyAlterTableSpaceStmt(AlterTableSpaceStmt *from)
+ {
+ AlterTableSpaceStmt *newnode = makeNode(AlterTableSpaceStmt);
+
+ COPY_STRING_FIELD(tablespacename);
+ COPY_NODE_FIELD(options);
+
+ return newnode;
+ }
+
static CreateFdwStmt *
_copyCreateFdwStmt(CreateFdwStmt *from)
{
***************
*** 4021,4026 **** copyObject(void *from)
--- 4032,4040 ----
case T_DropTableSpaceStmt:
retval = _copyDropTableSpaceStmt(from);
break;
+ case T_AlterTableSpaceStmt:
+ retval = _copyAlterTableSpaceStmt(from);
+ break;
case T_CreateFdwStmt:
retval = _copyCreateFdwStmt(from);
break;
*** a/src/backend/nodes/equalfuncs.c
--- b/src/backend/nodes/equalfuncs.c
***************
*** 1569,1574 **** _equalDropTableSpaceStmt(DropTableSpaceStmt *a, DropTableSpaceStmt *b)
--- 1569,1583 ----
}
static bool
+ _equalAlterTableSpaceStmt(AlterTableSpaceStmt *a, AlterTableSpaceStmt *b)
+ {
+ COMPARE_STRING_FIELD(tablespacename);
+ COMPARE_NODE_FIELD(options);
+
+ return true;
+ }
+
+ static bool
_equalCreateFdwStmt(CreateFdwStmt *a, CreateFdwStmt *b)
{
COMPARE_STRING_FIELD(fdwname);
***************
*** 2714,2719 **** equal(void *a, void *b)
--- 2723,2731 ----
case T_DropTableSpaceStmt:
retval = _equalDropTableSpaceStmt(a, b);
break;
+ case T_AlterTableSpaceStmt:
+ retval = _equalAlterTableSpaceStmt(a, b);
+ break;
case T_CreateFdwStmt:
retval = _equalCreateFdwStmt(a, b);
break;
*** a/src/backend/nodes/outfuncs.c
--- b/src/backend/nodes/outfuncs.c
***************
*** 1585,1590 **** _outRelOptInfo(StringInfo str, RelOptInfo *node)
--- 1585,1591 ----
WRITE_NODE_FIELD(cheapest_total_path);
WRITE_NODE_FIELD(cheapest_unique_path);
WRITE_UINT_FIELD(relid);
+ WRITE_UINT_FIELD(reltablespace);
WRITE_ENUM_FIELD(rtekind, RTEKind);
WRITE_INT_FIELD(min_attr);
WRITE_INT_FIELD(max_attr);
*** a/src/backend/optimizer/path/costsize.c
--- b/src/backend/optimizer/path/costsize.c
***************
*** 27,32 ****
--- 27,37 ----
* detail. Note that all of these parameters are user-settable, in case
* the default values are drastically off for a particular platform.
*
+ * seq_page_cost and random_page_cost can also be overridden for an individual
+ * tablespace, in case some data is on a fast disk and other data is on a slow
+ * disk. Per-tablespace overrides never apply to temporary work files such as
+ * an external sort or a materialize node that overflows work_mem.
+ *
* We compute two separate costs for each path:
* total_cost: total estimated cost to fetch all tuples
* startup_cost: cost that is expended before first tuple is fetched
***************
*** 164,169 **** void
--- 169,175 ----
cost_seqscan(Path *path, PlannerInfo *root,
RelOptInfo *baserel)
{
+ double spc_seq_page_cost;
Cost startup_cost = 0;
Cost run_cost = 0;
Cost cpu_per_tuple;
***************
*** 175,184 **** cost_seqscan(Path *path, PlannerInfo *root,
if (!enable_seqscan)
startup_cost += disable_cost;
/*
* disk costs
*/
! run_cost += seq_page_cost * baserel->pages;
/* CPU costs */
startup_cost += baserel->baserestrictcost.startup;
--- 181,195 ----
if (!enable_seqscan)
startup_cost += disable_cost;
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ NULL,
+ &spc_seq_page_cost);
+
/*
* disk costs
*/
! run_cost += spc_seq_page_cost * baserel->pages;
/* CPU costs */
startup_cost += baserel->baserestrictcost.startup;
***************
*** 226,231 **** cost_index(IndexPath *path, PlannerInfo *root,
--- 237,244 ----
Selectivity indexSelectivity;
double indexCorrelation,
csquared;
+ double spc_seq_page_cost,
+ spc_random_page_cost;
Cost min_IO_cost,
max_IO_cost;
Cost cpu_per_tuple;
***************
*** 272,284 **** cost_index(IndexPath *path, PlannerInfo *root,
/* estimate number of main-table tuples fetched */
tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
/*----------
* Estimate number of main-table pages fetched, and compute I/O cost.
*
* When the index ordering is uncorrelated with the table ordering,
* we use an approximation proposed by Mackert and Lohman (see
* index_pages_fetched() for details) to compute the number of pages
! * fetched, and then charge random_page_cost per page fetched.
*
* When the index ordering is exactly correlated with the table ordering
* (just after a CLUSTER, for example), the number of pages fetched should
--- 285,302 ----
/* estimate number of main-table tuples fetched */
tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+ /* fetch estimated page costs for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
/*----------
* Estimate number of main-table pages fetched, and compute I/O cost.
*
* When the index ordering is uncorrelated with the table ordering,
* we use an approximation proposed by Mackert and Lohman (see
* index_pages_fetched() for details) to compute the number of pages
! * fetched, and then charge spc_random_page_cost per page fetched.
*
* When the index ordering is exactly correlated with the table ordering
* (just after a CLUSTER, for example), the number of pages fetched should
***************
*** 286,292 **** cost_index(IndexPath *path, PlannerInfo *root,
* will be sequential fetches, not the random fetches that occur in the
* uncorrelated case. So if the number of pages is more than 1, we
* ought to charge
! * random_page_cost + (pages_fetched - 1) * seq_page_cost
* For partially-correlated indexes, we ought to charge somewhere between
* these two estimates. We currently interpolate linearly between the
* estimates based on the correlation squared (XXX is that appropriate?).
--- 304,310 ----
* will be sequential fetches, not the random fetches that occur in the
* uncorrelated case. So if the number of pages is more than 1, we
* ought to charge
! * spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost
* For partially-correlated indexes, we ought to charge somewhere between
* these two estimates. We currently interpolate linearly between the
* estimates based on the correlation squared (XXX is that appropriate?).
***************
*** 309,315 **** cost_index(IndexPath *path, PlannerInfo *root,
(double) index->pages,
root);
! max_IO_cost = (pages_fetched * random_page_cost) / num_scans;
/*
* In the perfectly correlated case, the number of pages touched by
--- 327,333 ----
(double) index->pages,
root);
! max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
/*
* In the perfectly correlated case, the number of pages touched by
***************
*** 328,334 **** cost_index(IndexPath *path, PlannerInfo *root,
(double) index->pages,
root);
! min_IO_cost = (pages_fetched * random_page_cost) / num_scans;
}
else
{
--- 346,352 ----
(double) index->pages,
root);
! min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
}
else
{
***************
*** 342,354 **** cost_index(IndexPath *path, PlannerInfo *root,
root);
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
! max_IO_cost = pages_fetched * random_page_cost;
/* min_IO_cost is for the perfectly correlated case (csquared=1) */
pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
! min_IO_cost = random_page_cost;
if (pages_fetched > 1)
! min_IO_cost += (pages_fetched - 1) * seq_page_cost;
}
/*
--- 360,372 ----
root);
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
! max_IO_cost = pages_fetched * spc_random_page_cost;
/* min_IO_cost is for the perfectly correlated case (csquared=1) */
pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
! min_IO_cost = spc_random_page_cost;
if (pages_fetched > 1)
! min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;
}
/*
***************
*** 553,558 **** cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
--- 571,578 ----
Cost cost_per_page;
double tuples_fetched;
double pages_fetched;
+ double spc_seq_page_cost,
+ spc_random_page_cost;
double T;
/* Should only be applied to base relations */
***************
*** 571,576 **** cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
--- 591,601 ----
startup_cost += indexTotalCost;
+ /* Fetch estimated page costs for tablespace containing table. */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
/*
* Estimate number of main-table pages fetched.
*/
***************
*** 609,625 **** cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
pages_fetched = ceil(pages_fetched);
/*
! * For small numbers of pages we should charge random_page_cost apiece,
* while if nearly all the table's pages are being read, it's more
! * appropriate to charge seq_page_cost apiece. The effect is nonlinear,
* too. For lack of a better idea, interpolate like this to determine the
* cost per page.
*/
if (pages_fetched >= 2.0)
! cost_per_page = random_page_cost -
! (random_page_cost - seq_page_cost) * sqrt(pages_fetched / T);
else
! cost_per_page = random_page_cost;
run_cost += pages_fetched * cost_per_page;
--- 634,651 ----
pages_fetched = ceil(pages_fetched);
/*
! * For small numbers of pages we should charge spc_random_page_cost apiece,
* while if nearly all the table's pages are being read, it's more
! * appropriate to charge spc_seq_page_cost apiece. The effect is nonlinear,
* too. For lack of a better idea, interpolate like this to determine the
* cost per page.
*/
if (pages_fetched >= 2.0)
! cost_per_page = spc_random_page_cost -
! (spc_random_page_cost - spc_seq_page_cost)
! * sqrt(pages_fetched / T);
else
! cost_per_page = spc_random_page_cost;
run_cost += pages_fetched * cost_per_page;
***************
*** 783,788 **** cost_tidscan(Path *path, PlannerInfo *root,
--- 809,815 ----
QualCost tid_qual_cost;
int ntuples;
ListCell *l;
+ double spc_random_page_cost;
/* Should only be applied to base relations */
Assert(baserel->relid > 0);
***************
*** 835,842 **** cost_tidscan(Path *path, PlannerInfo *root,
*/
cost_qual_eval(&tid_qual_cost, tidquals, root);
/* disk costs --- assume each tuple on a different page */
! run_cost += random_page_cost * ntuples;
/* CPU costs */
startup_cost += baserel->baserestrictcost.startup +
--- 862,874 ----
*/
cost_qual_eval(&tid_qual_cost, tidquals, root);
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ NULL);
+
/* disk costs --- assume each tuple on a different page */
! run_cost += spc_random_page_cost * ntuples;
/* CPU costs */
startup_cost += baserel->baserestrictcost.startup +
*** a/src/backend/optimizer/util/plancat.c
--- b/src/backend/optimizer/util/plancat.c
***************
*** 91,96 **** get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
--- 91,97 ----
rel->min_attr = FirstLowInvalidHeapAttributeNumber + 1;
rel->max_attr = RelationGetNumberOfAttributes(relation);
+ rel->reltablespace = RelationGetForm(relation)->reltablespace;
Assert(rel->max_attr >= rel->min_attr);
rel->attr_needed = (Relids *)
***************
*** 183,188 **** get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
--- 184,191 ----
info = makeNode(IndexOptInfo);
info->indexoid = index->indexrelid;
+ info->reltablespace =
+ RelationGetForm(indexRelation)->reltablespace;
info->rel = rel;
info->ncolumns = ncolumns = index->indnatts;
*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
***************
*** 5622,5627 **** RenameStmt: ALTER AGGREGATE func_name aggr_args RENAME TO name
--- 5622,5647 ----
n->newname = $6;
$$ = (Node *)n;
}
+ | ALTER TABLESPACE name SET reloptions
+ {
+ AlterTableSpaceStmt *n = makeNode(AlterTableSpaceStmt);
+ n->tablespacename = $3;
+ n->options = $5;
+ $$ = (Node *)n;
+ }
+ | ALTER TABLESPACE name RESET reloptions
+ {
+ AlterTableSpaceStmt *n = makeNode(AlterTableSpaceStmt);
+ ListCell *lc;
+ n->tablespacename = $3;
+ n->options = $5;
+ foreach (lc, n->options)
+ {
+ DefElem *def = lfirst(lc);
+ def->defaction = DEFELEM_DROP;
+ }
+ $$ = (Node *)n;
+ }
| ALTER TEXT_P SEARCH PARSER any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
*** a/src/backend/tcop/utility.c
--- b/src/backend/tcop/utility.c
***************
*** 214,219 **** check_xact_readonly(Node *parsetree)
--- 214,220 ----
case T_CreateUserMappingStmt:
case T_AlterUserMappingStmt:
case T_DropUserMappingStmt:
+ case T_AlterTableSpaceStmt:
ereport(ERROR,
(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
errmsg("transaction is read-only")));
***************
*** 480,485 **** ProcessUtility(Node *parsetree,
--- 481,490 ----
DropTableSpace((DropTableSpaceStmt *) parsetree);
break;
+ case T_AlterTableSpaceStmt:
+ AlterTableSpace((AlterTableSpaceStmt *) parsetree);
+ break;
+
case T_CreateFdwStmt:
CreateForeignDataWrapper((CreateFdwStmt *) parsetree);
break;
***************
*** 1386,1391 **** CreateCommandTag(Node *parsetree)
--- 1391,1400 ----
tag = "DROP TABLESPACE";
break;
+ case T_AlterTableSpaceStmt:
+ tag = "ALTER TABLESPACE";
+ break;
+
case T_CreateFdwStmt:
tag = "CREATE FOREIGN DATA WRAPPER";
break;
***************
*** 2165,2170 **** GetCommandLogLevel(Node *parsetree)
--- 2174,2183 ----
lev = LOGSTMT_DDL;
break;
+ case T_AlterTableSpaceStmt:
+ lev = LOGSTMT_DDL;
+ break;
+
case T_CreateFdwStmt:
case T_AlterFdwStmt:
case T_DropFdwStmt:
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 5372,5377 **** genericcostestimate(PlannerInfo *root,
--- 5372,5378 ----
QualCost index_qual_cost;
double qual_op_cost;
double qual_arg_cost;
+ double spc_random_page_cost;
List *selectivityQuals;
ListCell *l;
***************
*** 5480,5485 **** genericcostestimate(PlannerInfo *root,
--- 5481,5491 ----
else
numIndexPages = 1.0;
+ /* fetch estimated page cost for schema containing index */
+ get_tablespace_page_costs(index->reltablespace,
+ &spc_random_page_cost,
+ NULL);
+
/*
* Now compute the disk access costs.
*
***************
*** 5526,5540 **** genericcostestimate(PlannerInfo *root,
* share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
* since that's internal to the indexscan.)
*/
! *indexTotalCost = (pages_fetched * random_page_cost) / num_outer_scans;
}
else
{
/*
! * For a single index scan, we just charge random_page_cost per page
! * touched.
*/
! *indexTotalCost = numIndexPages * random_page_cost;
}
/*
--- 5532,5547 ----
* share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
* since that's internal to the indexscan.)
*/
! *indexTotalCost = (pages_fetched * spc_random_page_cost)
! / num_outer_scans;
}
else
{
/*
! * For a single index scan, we just charge spc_random_page_cost per
! * page touched.
*/
! *indexTotalCost = numIndexPages * spc_random_page_cost;
}
/*
***************
*** 5549,5559 **** genericcostestimate(PlannerInfo *root,
*
* We can deal with this by adding a very small "fudge factor" that
* depends on the index size. The fudge factor used here is one
! * random_page_cost per 100000 index pages, which should be small enough
! * to not alter index-vs-seqscan decisions, but will prevent indexes of
! * different sizes from looking exactly equally attractive.
*/
! *indexTotalCost += index->pages * random_page_cost / 100000.0;
/*
* CPU cost: any complex expressions in the indexquals will need to be
--- 5556,5566 ----
*
* We can deal with this by adding a very small "fudge factor" that
* depends on the index size. The fudge factor used here is one
! * spc_random_page_cost per 100000 index pages, which should be small
! * enough to not alter index-vs-seqscan decisions, but will prevent
! * indexes of different sizes from looking exactly equally attractive.
*/
! *indexTotalCost += index->pages * spc_random_page_cost / 100000.0;
/*
* CPU cost: any complex expressions in the indexquals will need to be
*** a/src/backend/utils/cache/lsyscache.c
--- b/src/backend/utils/cache/lsyscache.c
***************
*** 26,34 ****
--- 26,36 ----
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_statistic.h"
+ #include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
+ #include "optimizer/cost.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
***************
*** 2776,2778 **** get_roleid_checked(const char *rolname)
--- 2778,2819 ----
errmsg("role \"%s\" does not exist", rolname)));
return roleid;
}
+
+ /* ---------- PG_TABLESPACE CACHE ---------- */
+
+ /*
+ * get_tablespace_page_costs
+ * Returns random and seqential page costs for a given tablespace
+ */
+ void
+ get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost,
+ double *spc_seq_page_cost)
+ {
+ HeapTuple tp;
+
+ /* Ensure output args are initialized on failure */
+ if (spc_random_page_cost)
+ *spc_random_page_cost = random_page_cost;
+ if (spc_seq_page_cost)
+ *spc_seq_page_cost = seq_page_cost;
+
+ /* spcid is always from a pg_class tuple, so InvalidOid implies the
+ * default */
+ if (spcid == InvalidOid)
+ spcid = MyDatabaseTableSpace;
+
+ tp = SearchSysCache(TABLESPACEOID,
+ ObjectIdGetDatum(spcid),
+ 0, 0, 0);
+ if (HeapTupleIsValid(tp))
+ {
+ Form_pg_tablespace spctup = (Form_pg_tablespace) GETSTRUCT(tp);
+
+ if (spc_random_page_cost && spctup->spcrandom_page_cost >= 0)
+ *spc_random_page_cost = (double) spctup->spcrandom_page_cost;
+ if (spc_seq_page_cost && spctup->spcseq_page_cost >= 0)
+ *spc_seq_page_cost = (double) spctup->spcseq_page_cost;
+
+ ReleaseSysCache(tp);
+ }
+ }
*** a/src/backend/utils/cache/syscache.c
--- b/src/backend/utils/cache/syscache.c
***************
*** 43,48 ****
--- 43,49 ----
#include "catalog/pg_proc.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_statistic.h"
+ #include "catalog/pg_tablespace.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_config_map.h"
#include "catalog/pg_ts_dict.h"
***************
*** 609,614 **** static const struct cachedesc cacheinfo[] = {
--- 610,627 ----
},
1024
},
+ {TableSpaceRelationId, /* TABLESPACEOID */
+ TablespaceOidIndexId,
+ 0,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0,
+ },
+ 16
+ },
{TSConfigMapRelationId, /* TSCONFIGMAP */
TSConfigMapIndexId,
0,
*** a/src/bin/pg_dump/pg_dumpall.c
--- b/src/bin/pg_dump/pg_dumpall.c
***************
*** 956,974 **** dumpTablespaces(PGconn *conn)
* Get all tablespaces except built-in ones (which we assume are named
* pg_xxx)
*/
! if (server_version >= 80200)
res = executeQuery(conn, "SELECT spcname, "
"pg_catalog.pg_get_userbyid(spcowner) AS spcowner, "
"spclocation, spcacl, "
"pg_catalog.shobj_description(oid, 'pg_tablespace') "
"FROM pg_catalog.pg_tablespace "
"WHERE spcname !~ '^pg_' "
"ORDER BY 1");
else
res = executeQuery(conn, "SELECT spcname, "
"pg_catalog.pg_get_userbyid(spcowner) AS spcowner, "
"spclocation, spcacl, "
! "null "
"FROM pg_catalog.pg_tablespace "
"WHERE spcname !~ '^pg_' "
"ORDER BY 1");
--- 956,988 ----
* Get all tablespaces except built-in ones (which we assume are named
* pg_xxx)
*/
! if (server_version >= 80500)
res = executeQuery(conn, "SELECT spcname, "
"pg_catalog.pg_get_userbyid(spcowner) AS spcowner, "
"spclocation, spcacl, "
+ "array_to_string(ARRAY["
+ "CASE WHEN spcseq_page_cost < 0 THEN NULL "
+ "ELSE 'seq_page_cost = ' || spcseq_page_cost END,"
+ "CASE WHEN spcrandom_page_cost < 0 THEN NULL "
+ "ELSE 'random_page_cost = ' || spcrandom_page_cost END"
+ "], ', '),"
"pg_catalog.shobj_description(oid, 'pg_tablespace') "
"FROM pg_catalog.pg_tablespace "
"WHERE spcname !~ '^pg_' "
"ORDER BY 1");
+ else if (server_version >= 80200)
+ res = executeQuery(conn, "SELECT spcname, "
+ "pg_catalog.pg_get_userbyid(spcowner) AS spcowner, "
+ "spclocation, spcacl, null, "
+ "pg_catalog.shobj_description(oid, 'pg_tablespace'), "
+ "FROM pg_catalog.pg_tablespace "
+ "WHERE spcname !~ '^pg_' "
+ "ORDER BY 1");
else
res = executeQuery(conn, "SELECT spcname, "
"pg_catalog.pg_get_userbyid(spcowner) AS spcowner, "
"spclocation, spcacl, "
! "null, null "
"FROM pg_catalog.pg_tablespace "
"WHERE spcname !~ '^pg_' "
"ORDER BY 1");
***************
*** 983,989 **** dumpTablespaces(PGconn *conn)
char *spcowner = PQgetvalue(res, i, 1);
char *spclocation = PQgetvalue(res, i, 2);
char *spcacl = PQgetvalue(res, i, 3);
! char *spccomment = PQgetvalue(res, i, 4);
char *fspcname;
/* needed for buildACLCommands() */
--- 997,1004 ----
char *spcowner = PQgetvalue(res, i, 1);
char *spclocation = PQgetvalue(res, i, 2);
char *spcacl = PQgetvalue(res, i, 3);
! char *spcoptions = PQgetvalue(res, i, 4);
! char *spccomment = PQgetvalue(res, i, 5);
char *fspcname;
/* needed for buildACLCommands() */
***************
*** 996,1001 **** dumpTablespaces(PGconn *conn)
--- 1011,1020 ----
appendStringLiteralConn(buf, spclocation, conn);
appendPQExpBuffer(buf, ";\n");
+ if (spcoptions && spcoptions[0] != '\0')
+ appendPQExpBuffer(buf, "ALTER TABLESPACE %s SET (%s);\n",
+ fspcname, spcoptions);
+
if (!skip_acls &&
!buildACLCommands(fspcname, NULL, "TABLESPACE", spcacl, spcowner,
"", server_version, buf))
*** a/src/include/catalog/pg_tablespace.h
--- b/src/include/catalog/pg_tablespace.h
***************
*** 32,37 **** CATALOG(pg_tablespace,1213) BKI_SHARED_RELATION
--- 32,39 ----
{
NameData spcname; /* tablespace name */
Oid spcowner; /* owner of tablespace */
+ float8 spcrandom_page_cost; /* per-tablespace random_page_cost */
+ float8 spcseq_page_cost; /* per-tablespace seq_page_cost */
text spclocation; /* physical location (VAR LENGTH) */
aclitem spcacl[1]; /* access permissions (VAR LENGTH) */
} FormData_pg_tablespace;
***************
*** 48,61 **** typedef FormData_pg_tablespace *Form_pg_tablespace;
* ----------------
*/
! #define Natts_pg_tablespace 4
! #define Anum_pg_tablespace_spcname 1
! #define Anum_pg_tablespace_spcowner 2
! #define Anum_pg_tablespace_spclocation 3
! #define Anum_pg_tablespace_spcacl 4
! DATA(insert OID = 1663 ( pg_default PGUID "" _null_ ));
! DATA(insert OID = 1664 ( pg_global PGUID "" _null_ ));
#define DEFAULTTABLESPACE_OID 1663
#define GLOBALTABLESPACE_OID 1664
--- 50,65 ----
* ----------------
*/
! #define Natts_pg_tablespace 6
! #define Anum_pg_tablespace_spcname 1
! #define Anum_pg_tablespace_spcowner 2
! #define Anum_pg_tablespace_spcrandom_page_cost 3
! #define Anum_pg_tablespace_spcseq_page_cost 4
! #define Anum_pg_tablespace_spclocation 5
! #define Anum_pg_tablespace_spcacl 6
! DATA(insert OID = 1663 ( pg_default PGUID -1 -1 "" _null_ ));
! DATA(insert OID = 1664 ( pg_global PGUID -1 -1 "" _null_ ));
#define DEFAULTTABLESPACE_OID 1663
#define GLOBALTABLESPACE_OID 1664
*** a/src/include/commands/tablespace.h
--- b/src/include/commands/tablespace.h
***************
*** 35,40 **** typedef struct xl_tblspc_drop_rec
--- 35,41 ----
extern void CreateTableSpace(CreateTableSpaceStmt *stmt);
extern void DropTableSpace(DropTableSpaceStmt *stmt);
+ extern void AlterTableSpace(AlterTableSpaceStmt *stmt);
extern void RenameTableSpace(const char *oldname, const char *newname);
extern void AlterTableSpaceOwner(const char *name, Oid newOwnerId);
*** a/src/include/nodes/nodes.h
--- b/src/include/nodes/nodes.h
***************
*** 346,351 **** typedef enum NodeTag
--- 346,352 ----
T_CreateUserMappingStmt,
T_AlterUserMappingStmt,
T_DropUserMappingStmt,
+ T_AlterTableSpaceStmt,
/*
* TAGS FOR PARSE TREE NODES (parsenodes.h)
*** a/src/include/nodes/parsenodes.h
--- b/src/include/nodes/parsenodes.h
***************
*** 1464,1469 **** typedef struct DropTableSpaceStmt
--- 1464,1476 ----
bool missing_ok; /* skip error if missing? */
} DropTableSpaceStmt;
+ typedef struct AlterTableSpaceStmt
+ {
+ NodeTag type;
+ char *tablespacename;
+ List *options;
+ } AlterTableSpaceStmt;
+
/* ----------------------
* Create/Drop FOREIGN DATA WRAPPER Statements
* ----------------------
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
***************
*** 361,366 **** typedef struct RelOptInfo
--- 361,367 ----
/* information about a base rel (not set for join rels!) */
Index relid;
+ Oid reltablespace; /* containing tablespace */
RTEKind rtekind; /* RELATION, SUBQUERY, or FUNCTION */
AttrNumber min_attr; /* smallest attrno of rel (often <0) */
AttrNumber max_attr; /* largest attrno of rel */
***************
*** 425,430 **** typedef struct IndexOptInfo
--- 426,432 ----
NodeTag type;
Oid indexoid; /* OID of the index relation */
+ Oid reltablespace; /* tablespace of index (not table) */
RelOptInfo *rel; /* back-link to index's table */
/* statistics from pg_class */
*** a/src/include/utils/lsyscache.h
--- b/src/include/utils/lsyscache.h
***************
*** 137,142 **** extern void free_attstatsslot(Oid atttype,
--- 137,144 ----
extern char *get_namespace_name(Oid nspid);
extern Oid get_roleid(const char *rolname);
extern Oid get_roleid_checked(const char *rolname);
+ void get_tablespace_page_costs(Oid spcid, float8 *spc_random_page_cost,
+ float8 *spc_seq_page_cost);
#define type_is_array(typid) (get_element_type(typid) != InvalidOid)
*** a/src/include/utils/syscache.h
--- b/src/include/utils/syscache.h
***************
*** 71,76 **** enum SysCacheIdentifier
--- 71,77 ----
RELOID,
RULERELNAME,
STATRELATT,
+ TABLESPACEOID,
TSCONFIGMAP,
TSCONFIGNAMENSP,
TSCONFIGOID,
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers