On 08/04/2019 20:37, Andres Freund wrote:
Hi,
On 2019-04-08 15:34:46 +0300, Heikki Linnakangas wrote:
There were a bunch of typos in the comments in tableam.h, see attached. Some
of the comments could use more copy-editing and clarification, I think, but
I stuck to fixing just typos and such for now.
I pushed these after adding three boring changes by pgindent. Thanks for
those!
I'd greatly welcome more feedback on the comments - I've been pretty
deep in this for so long that I don't see all of the issues anymore. And
a mild dyslexia doesn't help...
Here is another iteration on the comments. The patch is a mix of
copy-editing and questions. The questions are marked with "HEIKKI:". I
can continue the copy-editing, if you can reply to the questions,
clarifying the intention on some parts of the API. (Or feel free to pick
and push any of these fixes immediately, if you prefer.)
- Heikki
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f7f726b5aec..bbcab9ce31a 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3638,7 +3638,7 @@ static struct config_string ConfigureNamesString[] =
{"default_table_access_method", PGC_USERSET, CLIENT_CONN_STATEMENT,
gettext_noop("Sets the default table access method for new tables."),
NULL,
- GUC_IS_NAME
+ GUC_NOT_IN_SAMPLE | GUC_IS_NAME
},
&default_table_access_method,
DEFAULT_TABLE_ACCESS_METHOD,
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 82de4cdcf2c..8aeeba38ca2 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -57,6 +57,8 @@ typedef struct TableScanDescData *TableScanDesc;
* pointer to this structure. The information here must be sufficient to
* properly initialize each new TableScanDesc as workers join the scan, and it
* must act as a information what to scan for those workers.
+ *
+ * This is stored in dynamic shared memory, so you can't use pointers here.
*/
typedef struct ParallelTableScanDescData
{
@@ -64,6 +66,11 @@ typedef struct ParallelTableScanDescData
bool phs_syncscan; /* report location to syncscan logic? */
bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */
Size phs_snapshot_off; /* data for snapshot */
+
+ /*
+ * Table AM specific data follows. After the table AM specific data
+ * comes the snapshot data, at 'phs_snapshot_off'.
+ */
} ParallelTableScanDescData;
typedef struct ParallelTableScanDescData *ParallelTableScanDesc;
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 6fbfcb96c98..d4709563e7e 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -91,8 +91,9 @@ typedef enum TM_Result
* xmax is the outdating transaction's XID. If the caller wants to visit the
* replacement tuple, it must check that this matches before believing the
* replacement is really a match.
+ * HEIKKI: matches what? xmin, but that's specific to the heapam.
*
- * cmax is the outdating command's CID, but only when the failure code is
+ * cmax is the outdating command's CID. Only set when the failure code is
* TM_SelfModified (i.e., something in the current transaction outdated the
* tuple); otherwise cmax is zero. (We make this restriction because
* HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
@@ -133,7 +134,11 @@ typedef void (*IndexBuildCallback) (Relation index,
* returned by FormData_pg_am.amhandler.
*
* In most cases it's not appropriate to call the callbacks directly, use the
- * table_* wrapper functions instead.
+ * table_* wrapper functions instead. The descriptions of the callbacks here
+ * are written from the AM implementor's point of view. For more information
+ * on how to call them, see the wrapper functions. (If you update the comments
+ * on either a callback or a wrapper function, remember to also update the other
+ * one!)
*
* GetTableAmRoutine() asserts that required callbacks are filled in, remember
* to update when adding a callback.
@@ -179,6 +184,12 @@ typedef struct TableAmRoutine
*
* if temp_snap is true, the snapshot will need to be deallocated at
* scan_end.
+ *
+ * HEIKKI: table_scan_update_snapshot() changes the snapshot. That's
+ * a bit surprising for the AM, no? Can it be called when a scan is
+ * already in progress?
+ *
+ * HEIKKI: A flags bitmask argument would be more readable than 6 booleans
*/
TableScanDesc (*scan_begin) (Relation rel,
Snapshot snapshot,
@@ -194,6 +205,9 @@ typedef struct TableAmRoutine
/*
* Release resources and deallocate scan. If TableScanDesc.temp_snap,
* TableScanDesc.rs_snapshot needs to be unregistered.
+ *
+ * HEIKKI: I find this 'temp_snap' thing pretty weird. Can't the caller handle
+ * deregistering it?
*/
void (*scan_end) (TableScanDesc scan);
@@ -221,6 +235,11 @@ typedef struct TableAmRoutine
/*
* Estimate the size of shared memory needed for a parallel scan of this
* relation. The snapshot does not need to be accounted for.
+ *
+ * HEIKKI: If this returns X, then the parallelscan_initialize() call
+ * mustn't use more than X. So this is not just for optimization purposes,
+ * for example. Not sure how to phrase that, but could use some
+ * clarification.
*/
Size (*parallelscan_estimate) (Relation rel);
@@ -228,6 +247,8 @@ typedef struct TableAmRoutine
* Initialize ParallelTableScanDesc for a parallel scan of this relation.
* `pscan` will be sized according to parallelscan_estimate() for the same
* relation.
+ *
+ * HEIKKI: What does this return?
*/
Size (*parallelscan_initialize) (Relation rel,
ParallelTableScanDesc pscan);
@@ -246,18 +267,22 @@ typedef struct TableAmRoutine
*/
/*
- * Prepare to fetch tuples from the relation, as needed when fetching
- * tuples for an index scan. The callback has to return an
- * IndexFetchTableData, which the AM will typically embed in a larger
- * structure with additional information.
+ * Prepare to fetch tuples from the relation, for an index scan. The
+ * callback has to return an IndexFetchTableData, which the AM will
+ * typically embed in a larger structure with additional information.
*
- * Tuples for an index scan can then be fetched via index_fetch_tuple.
+ * After this, the caller will call index_fetch_tuple(), as many times as
+ * needed, to fetch the tuples.
*/
struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
/*
* Reset index fetch. Typically this will release cross index fetch
* resources held in IndexFetchTableData.
+ *
+ * HEIKKI: Is this called between every call to index_fetch_tuple()?
+ * Between every call to index_fetch_tuple(), except when call_again is
+ * set? Can it be a no-op?
*/
void (*index_fetch_reset) (struct IndexFetchTableData *data);
@@ -272,19 +297,22 @@ typedef struct TableAmRoutine
* test, return true, false otherwise.
*
* Note that AMs that do not necessarily update indexes when indexed
- * columns do not change, need to return the current/correct version of
+ * columns don't change, need to return the current/correct version of
* the tuple that is visible to the snapshot, even if the tid points to an
* older version of the tuple.
*
* *call_again is false on the first call to index_fetch_tuple for a tid.
- * If there potentially is another tuple matching the tid, *call_again
- * needs be set to true by index_fetch_tuple, signalling to the caller
+ * If there potentially is another tuple matching the tid, the callback
+ * needs to set *call_again to true, signalling to the caller
* that index_fetch_tuple should be called again for the same tid.
*
* *all_dead, if all_dead is not NULL, should be set to true by
* index_fetch_tuple iff it is guaranteed that no backend needs to see
- * that tuple. Index AMs can use that do avoid returning that tid in
+ * that tuple. Index AMs can use that to avoid returning that tid in
* future searches.
+ *
+ * HEIKKI: Should the snapshot be given in index_fetch_begin()? Can it
+ * differ between calls?
*/
bool (*index_fetch_tuple) (struct IndexFetchTableData *scan,
ItemPointer tid,
@@ -302,6 +330,8 @@ typedef struct TableAmRoutine
* Fetch tuple at `tid` into `slot`, after doing a visibility test
* according to `snapshot`. If a tuple was found and passed the visibility
* test, returns true, false otherwise.
+ *
+ * HEIKKI: explain how this differs from index_fetch_tuple.
*/
bool (*tuple_fetch_row_version) (Relation rel,
ItemPointer tid,
@@ -311,14 +341,17 @@ typedef struct TableAmRoutine
/*
* Return the latest version of the tuple at `tid`, by updating `tid` to
* point at the newest version.
+ *
+ * HEIKKI: the latest version visible to the snapshot?
*/
void (*tuple_get_latest_tid) (Relation rel,
Snapshot snapshot,
ItemPointer tid);
/*
- * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
- * the appropriate type for the AM.
+ * Does the tuple in `slot` satisfy `snapshot`?
+ *
+ * The AM may modify the data underlying the tuple as a side-effect.
*/
bool (*tuple_satisfies_snapshot) (Relation rel,
TupleTableSlot *slot,
@@ -413,8 +446,8 @@ typedef struct TableAmRoutine
*/
/*
- * This callback needs to create a new relation filenode for `rel`, with
- * appropriate durability behaviour for `persistence`.
+ * Create a new relation filenode for `rel`, with appropriate durability
+ * behaviour for `persistence`.
*
* On output *freezeXid, *minmulti must be set to the values appropriate
* for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
@@ -429,24 +462,40 @@ typedef struct TableAmRoutine
MultiXactId *minmulti);
/*
- * This callback needs to remove all contents from `rel`'s current
- * relfilenode. No provisions for transactional behaviour need to be made.
- * Often this can be implemented by truncating the underlying storage to
- * its minimal size.
- *
- * See also table_relation_nontransactional_truncate().
+ * Remove all rows from `rel`'s current relfilenode. No provisions for
+ * transactional behaviour need to be made. Often this can be implemented
+ * by truncating the underlying storage to its minimal size.
*/
void (*relation_nontransactional_truncate) (Relation rel);
/*
- * See table_relation_copy_data().
+ * Copy data from `rel` into the new relfilenode `newrnode`. The new
+ * relfilenode might not have storage associated with it before this
+ * callback is called. This is used for low level operations like
+ * changing a relation's tablespace.
*
* This can typically be implemented by directly copying the underlying
* storage, unless it contains references to the tablespace internally.
*/
void (*relation_copy_data) (Relation rel, RelFileNode newrnode);
- /* See table_relation_copy_for_cluster() */
+ /*
+ * Copy all data from `OldHeap` into `NewHeap`, as part of a CLUSTER or
+ * VACUUM FULL.
+ *
+ * If `OldIndex` is valid, the data should be ordered according to the
+ * given index. If `use_sort` is false, the data should be fetched from the
+ * index, otherwise it should be fetched from the old table and sorted.
+ *
+ * OldestXmin, FreezeXid, MultiXactCutoff are currently valid values for
+ * the table.
+ * HEIKKI: What does "currently valid" mean? Valid for the old table?
+ *
+ * The callback should set *num_tuples, *tups_vacuumed, *tups_recently_dead
+ * to statistics computed while copying for the relation. Not all might make
+ * sense for every AM.
+ * HEIKKI: What to do for the ones that don't make sense? Set to 0?
+ */
void (*relation_copy_for_cluster) (Relation NewHeap,
Relation OldHeap,
Relation OldIndex,
@@ -466,9 +515,8 @@ typedef struct TableAmRoutine
* On entry a transaction is already established, and the relation is
* locked with a ShareUpdateExclusive lock.
*
- * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
- * this routine, even if (for ANALYZE) it is part of the same VACUUM
- * command.
+ * Note that VACUUM FULL (or CLUSTER) does not use this callback.
+ * Neither does ANALYZE, even if it is part of the same VACUUM command.
*
* There probably, in the future, needs to be a separate callback to
* integrate with autovacuum's scheduling.
@@ -479,13 +527,13 @@ typedef struct TableAmRoutine
/*
* Prepare to analyze block `blockno` of `scan`. The scan has been started
- * with table_beginscan_analyze(). See also
- * table_scan_analyze_next_block().
+ * with table_beginscan_analyze().
*
* The callback may acquire resources like locks that are held until
- * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
+ * table_scan_analyze_next_tuple() returns false. For example, it can make sense
* to hold a lock until all tuples on a block have been analyzed by
* scan_analyze_next_tuple.
+ * HEIKKI: Hold a lock on what? A lwlock on the page?
*
* The callback can return false if the block is not suitable for
* sampling, e.g. because it's a metapage that could never contain tuples.
@@ -589,8 +637,8 @@ typedef struct TableAmRoutine
struct TBMIterateResult *tbmres);
/*
- * Fetch the next tuple of a bitmap table scan into `slot` and return true
- * if a visible tuple was found, false otherwise.
+ * Fetch the next visible tuple of a bitmap table scan into `slot`. If a
+ * tuple was found, returns true, false otherwise.
*
* For some AMs it will make more sense to do all the work referencing
* `tbmres` contents in scan_bitmap_next_block, for others it might be
@@ -618,6 +666,8 @@ typedef struct TableAmRoutine
* internally needs to perform mapping between the internal and a block
* based representation.
*
+ * HEIKKI: What TsmRoutine? Where is that?
+ *
* Note that it's not acceptable to hold deadlock prone resources such as
* lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
* block - the tuple is likely to be returned to an upper query node, and
@@ -632,9 +682,11 @@ typedef struct TableAmRoutine
struct SampleScanState *scanstate);
/*
- * This callback, only called after scan_sample_next_block has returned
- * true, should determine the next tuple to be returned from the selected
- * block using the TsmRoutine's NextSampleTuple() callback.
+ * Return the next tuple in a sample scan.
+ *
+ * This callback will only be called after scan_sample_next_block has
+ * returned true. It should determine the next tuple to be returned from
+ * the selected block using the TsmRoutine's NextSampleTuple() callback.
*
* The callback needs to perform visibility checks, and only return
* visible tuples. That obviously can mean calling NextSampletuple()
@@ -657,15 +709,15 @@ typedef struct TableAmRoutine
*/
/*
- * Returns slot callbacks suitable for holding tuples of the appropriate type
+ * Return slot callbacks suitable for holding tuples of the appropriate type
* for the relation. Works for tables, views, foreign tables and partitioned
* tables.
*/
extern const TupleTableSlotOps *table_slot_callbacks(Relation rel);
/*
- * Returns slot using the callbacks returned by table_slot_callbacks(), and
- * registers it on *reglist.
+ * Return a slot using the callbacks returned by table_slot_callbacks(), and
+ * register it on *reglist.
*/
extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
@@ -676,8 +728,8 @@ extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
*/
/*
- * Start a scan of `rel`. Returned tuples pass a visibility test of
- * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
+ * Start a scan of `rel`. Returned tuples are visible according to `snapshot`,
+ * and if nkeys != 0, the results are filtered by those scan keys.
*/
static inline TableScanDesc
table_beginscan(Relation rel, Snapshot snapshot,
@@ -688,18 +740,24 @@ table_beginscan(Relation rel, Snapshot snapshot,
}
/*
- * Like table_beginscan(), but for scanning catalog. It'll automatically use a
- * snapshot appropriate for scanning catalog relations.
+ * Like table_beginscan(), but for scanning a system catalog. It will
+ * automatically use a snapshot appropriate for scanning catalog relations.
*/
extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys,
struct ScanKeyData *key);
/*
* Like table_beginscan(), but table_beginscan_strat() offers an extended API
- * that lets the caller control whether a nondefault buffer access strategy
- * can be used, and whether syncscan can be chosen (possibly resulting in the
- * scan not starting from block zero). Both of these default to true with
- * plain table_beginscan.
+ * that lets the caller to use a non-default buffer access strategy, or
+ * specify that a synchronized scan can be used (possibly resulting in the
+ * scan not starting from block zero). Both of these default to true, as
+ * with plain table_beginscan.
+ *
+ * HEIKKI: I'm a bit confused by 'allow_strat'. What is the non-default
+ * strategy that will get used if you pass allow_strat=true? Perhaps the flag
+ * should be called "use_bulkread_strategy"? Or it should be of type
+ * BufferAccessStrategyType, or the caller should create a strategy with
+ * GetAccessStrategy() and pass that.
*/
static inline TableScanDesc
table_beginscan_strat(Relation rel, Snapshot snapshot,
@@ -712,10 +770,10 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
}
/*
- * table_beginscan_bm is an alternative entry point for setting up a
- * TableScanDesc for a bitmap heap scan. Although that scan technology is
- * really quite unlike a standard seqscan, there is just enough commonality to
- * make it worth using the same data structure.
+ * table_beginscan_bm() is an alternative entry point for setting up a
+ * TableScanDesc for a bitmap heap scan. Although a bitmap scan is
+ * really quite unlike a standard seqscan, there is just enough commonality
+ * that it makes sense to use a TableScanDesc for both.
*/
static inline TableScanDesc
table_beginscan_bm(Relation rel, Snapshot snapshot,
@@ -726,11 +784,13 @@ table_beginscan_bm(Relation rel, Snapshot snapshot,
}
/*
- * table_beginscan_sampling is an alternative entry point for setting up a
+ * table_beginscan_sampling() is an alternative entry point for setting up a
* TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
* using the same data structure although the behavior is rather different.
* In addition to the options offered by table_beginscan_strat, this call
* also allows control of whether page-mode visibility checking is used.
+ *
+ * HEIKKI: What is 'pagemode'?
*/
static inline TableScanDesc
table_beginscan_sampling(Relation rel, Snapshot snapshot,
@@ -973,9 +1033,6 @@ table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
*
* This assumes the slot's tuple is valid, and of the appropriate type for the
* AM.
- *
- * Some AMs might modify the data underlying the tuple as a side-effect. If so
- * they ought to mark the relevant buffer dirty.
*/
static inline bool
table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
@@ -1004,31 +1061,33 @@ table_compute_xid_horizon_for_tuples(Relation rel,
*/
/*
- * Insert a tuple from a slot into table AM routine.
+ * Insert a tuple from a slot into the table.
*
- * The options bitmask allows to specify options that allow to change the
- * behaviour of the AM. Several options might be ignored by AMs not supporting
- * them.
+ * The options bitmask allows changing the behaviour of the AM. The AM may
+ * ignore options that it does not support.
*
* If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple doesn't
* need to be logged to WAL, even for a non-temp relation. It is the AMs
* choice whether this optimization is supported.
*
- * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
- * free space in the relation. This can save some cycles when we know the
+ * The TABLE_INSERT_SKIP_FSM option is a hint that the AM should not bother
+ * to try reusing free space. This can save some cycles when we know the
* relation is new and doesn't contain useful amounts of free space. It's
* commonly passed directly to RelationGetBufferForTuple, see for more info.
*
- * TABLE_INSERT_FROZEN should only be specified for inserts into
- * relfilenodes created during the current subtransaction and when
- * there are no prior snapshots or pre-existing portals open.
- * This causes rows to be frozen, which is an MVCC violation and
- * requires explicit options chosen by user.
+ * TABLE_INSERT_FROZEN means that the AM may skip normal MVCC rules for the
+ * tuples, so that they become immediately visible to everyone. That is a
+ * violation of the normal MVCC rules, and requires specific action from the
+ * user, currently only used for the COPY FREEZE option. Even then, it should
+ * only be specified for inserts into relfilenodes created during the current
+ * subtransaction and when there are no prior snapshots or pre-existing portals
+ * open.
*
* TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
* information for the tuple. This should solely be used during table rewrites
* where RelationIsLogicallyLogged(relation) is not yet accurate for the new
* relation.
+ * HEIKKI: Is this optional, too? Can the AM ignore it?
*
* Note that most of these options will be applied when inserting into the
* heap's TOAST table, too, if the tuple requires any out-of-line data.
@@ -1041,6 +1100,8 @@ table_compute_xid_horizon_for_tuples(Relation rel,
* On return the slot's tts_tid and tts_tableOid are updated to reflect the
* insertion. But note that any toasting of fields within the slot is NOT
* reflected in the slots contents.
+ *
+ * HEIKKI: I think GetBulkInsertState() should be an AM-specific callback.
*/
static inline void
table_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
@@ -1089,9 +1150,8 @@ table_complete_speculative(Relation rel, TupleTableSlot *slot,
* operation. That's often faster than calling table_insert() in a loop,
* because e.g. the AM can reduce WAL logging and page locking overhead.
*
- * Except for taking `nslots` tuples as input, as an array of TupleTableSlots
- * in `slots`, the parameters for table_multi_insert() are the same as for
- * table_insert().
+ * The parameters are the same as for table_insert(), except for taking
+ * an array of slots.
*
* Note: this leaks memory into the current memory context. You can create a
* temporary context before calling this, if that's a problem.
@@ -1115,20 +1175,25 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* tid - TID of tuple to be deleted
* cid - delete command ID (used for visibility test, and stored into
* cmax if successful)
+ * HEIKKI: description for 'snapshot' parameter is missing
* crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
- * Output parameters:
- * tmfd - filled in failure cases (see below)
* changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false.
+ * Output parameters:
+ * tmfd - filled in failure cases (see below)
+ *
+ * HEIKKI: What's the AM supposed to do differently if 'changingPart' is set?
+ * (I know, it's supposed to set the t_ctid to the magic "moved partition"
+ * value. Explain that)
*
* Normal, successful return value is TM_Ok, which means we did actually
- * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
- * TM_BeingModified (the last only possible if wait == false).
+ * delete it. Failure return codes are TM_SelfModified, TM_Updated,
+ * TM_Deleted, and TM_BeingModified (the last only possible if wait == false).
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
- * t_xmax, and, if possible, and, if possible, t_cmax. See comments for
- * struct TM_FailureData for additional info.
+ * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
+ * for additional info.
*/
static inline TM_Result
table_delete(Relation rel, ItemPointer tid, CommandId cid,
@@ -1161,8 +1226,8 @@ table_delete(Relation rel, ItemPointer tid, CommandId cid,
* are required for this tuple
*
* Normal, successful return value is TM_Ok, which means we did actually
- * update it. Failure return codes are TM_SelfModified, TM_Updated, and
- * TM_BeingModified (the last only possible if wait == false).
+ * update it. Failure return codes are TM_SelfModified, TM_Updated,
+ * TM_Deleted, and TM_BeingModified (the last only possible if wait == false).
*
* On success, the slot's tts_tid and tts_tableOid are updated to match the new
* stored tuple; in particular, slot->tts_tid is set to the TID where the
@@ -1170,6 +1235,9 @@ table_delete(Relation rel, ItemPointer tid, CommandId cid,
* update was done. However, any TOAST changes in the new tuple's
* data are not reflected into *newtup.
*
+ * HEIKKI: There is no 'newtup'.
+ * HEIKKI: HEAP_ONLY_TUPLE is AM-specific; do the callers peek into that, currently?
+ *
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
* for additional info.
@@ -1233,8 +1301,8 @@ table_lock_tuple(Relation rel, ItemPointer tid, Snapshot snapshot,
/*
* Perform operations necessary to complete insertions made via
* tuple_insert and multi_insert with a BulkInsertState specified. This
- * e.g. may e.g. used to flush the relation when inserting with
- * TABLE_INSERT_SKIP_WAL specified.
+ * may e.g. be used to flush the relation when the TABLE_INSERT_SKIP_WAL
+ * option was used.
*/
static inline void
table_finish_bulk_insert(Relation rel, int options)
@@ -1257,8 +1325,8 @@ table_finish_bulk_insert(Relation rel, int options)
* This is used both during relation creation and various DDL operations to
* create a new relfilenode that can be filled from scratch.
*
- * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
- * that pg_class.{relfrozenxid, relminmxid} have to be set to.
+ * The function sets *freezeXid, *minmulti to the xid / multixact horizon
+ * values that the table's pg_class.{relfrozenxid, relminmxid} have to be set to.
*/
static inline void
table_relation_set_new_filenode(Relation rel, char persistence,
@@ -1271,7 +1339,7 @@ table_relation_set_new_filenode(Relation rel, char persistence,
/*
* Remove all table contents from `rel`, in a non-transactional manner.
- * Non-transactional meaning that there's no need to support rollbacks. This
+ * Non-transactional, meaning that this cannot be rolled back. This is
* commonly only is used to perform truncations for relfilenodes created in the
* current transaction.
*/
@@ -1294,20 +1362,19 @@ table_relation_copy_data(Relation rel, RelFileNode newrnode)
}
/*
- * Copy data from `OldHeap` into `NewHeap`, as part of a CLUSTER or VACUUM
- * FULL.
+ * Copy all data from `OldHeap` into `NewHeap`, as part of a CLUSTER or
+ * VACUUM FULL.
*
- * If `use_sort` is true, the table contents are sorted appropriate for
- * `OldIndex`; if use_sort is false and OldIndex is not InvalidOid, the data
- * is copied in that index's order; if use_sort is false and OidIndex is
- * InvalidOid, no sorting is performed.
+ * If `OldIndex` is valid, the data should be ordered according to the
+ * given index. If `use_sort` is false, the data should be fetched from the
+ * index, otherwise it should be fetched from the old table and sorted.
*
* OldestXmin, FreezeXid, MultiXactCutoff must be currently valid values for
* the table.
*
- * *num_tuples, *tups_vacuumed, *tups_recently_dead will contain statistics
- * computed while copying for the relation. Not all might make sense for every
- * AM.
+ * The function sets *num_tuples, *tups_vacuumed, *tups_recently_dead with
+ * statistics computed while copying for the relation. Not all might make sense
+ * for every AM.
*/
static inline void
table_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
@@ -1369,7 +1436,7 @@ table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
* is stored in `slot`.
*
* *liverows and *deadrows are incremented according to the encountered
- * tuples.
+ * tuples, if the AM has the concept of live and dead tuples.
*/
static inline bool
table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
@@ -1384,34 +1451,36 @@ table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
/*
* table_index_build_scan - scan the table to find tuples to be indexed
*
- * This is called back from an access-method-specific index build procedure
- * after the AM has done whatever setup it needs. The parent heap relation
- * is scanned to find tuples that should be entered into the index. Each
- * such tuple is passed to the AM's callback routine, which does the right
- * things to add it to the new index. After we return, the AM's index
- * build procedure does whatever cleanup it needs.
+ * This is called by the index-AM-specific index build procedure, after the
+ * index AM has done whatever setup it needs. This function scans the parent
+ * table, to find tuples that should be entered into the index. Each such tuple
+ * is passed to the index AM's callback routine, which does the right things to
+ * add it to the new index. After this returns, the index AM's index build
+ * procedure does whatever cleanup it needs.
*
* The total count of live tuples is returned. This is for updating pg_class
* statistics. (It's annoying not to be able to do that here, but we want to
* merge that update with others; see index_update_stats.) Note that the
* index AM itself must keep track of the number of index tuples; we don't do
- * so here because the AM might reject some of the tuples for its own reasons,
+ * so here because the index AM might reject some of the tuples for its own reasons,
* such as being unable to store NULLs.
*
- * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
+ * If 'progress' is true, the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
* starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
*
- * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
+ * A side effect is to set index_info->ii_BrokenHotChain to true if we detect
* any potentially broken HOT chains. Currently, we set this if there are any
* RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
* very hard to detect whether they're really incompatible with the chain tip.
* This only really makes sense for heap AM, it might need to be generalized
* for other AMs later.
+ *
+ * HEIKKI: What does 'allow_sync' do?
*/
static inline double
table_index_build_scan(Relation heap_rel,
Relation index_rel,
- struct IndexInfo *index_nfo,
+ struct IndexInfo *index_info,
bool allow_sync,
bool progress,
IndexBuildCallback callback,
@@ -1420,7 +1489,7 @@ table_index_build_scan(Relation heap_rel,
{
return heap_rel->rd_tableam->index_build_range_scan(heap_rel,
index_rel,
- index_nfo,
+ index_info,
allow_sync,
false,
progress,
@@ -1432,12 +1501,12 @@ table_index_build_scan(Relation heap_rel,
}
/*
- * As table_index_build_scan(), except that instead of scanning the complete
- * table, only the given number of blocks are scanned. Scan to end-of-rel can
+ * As table_index_build_scan(), but instead of scanning the complete
+ * table, only the given block range is scanned. Scan to end-of-rel can
* be signalled by passing InvalidBlockNumber as numblocks. Note that
* restricting the range to scan cannot be done when requesting syncscan.
*
- * When "anyvisible" mode is requested, all tuples visible to any transaction
+ * When 'anyvisible' mode is requested, all tuples visible to any transaction
* are indexed and counted as live, including those inserted or deleted by
* transactions that are still in progress.
*/