On 7.1.2013 10:07, Shigeru Hanada wrote:
> Hi Tomas,
>
> I reviewed v6 patch, and found that several places need fix.
> Sorry for extra nitpickings.
>
> * I found another extra space after asterisk.
>
> + RelFileNode * nodes;
Thanks, fixed.
>
> * Curly bracket which starts code block should be at the head of next line.
>
> + /* extend the array if needed (double the size)
> */
> + if (maxrels <= nrels) {
Fixed.
>
> * There are several trailing tabs in src/backend/catalog/storage.c.
Fixed (I balieve).
> * naming of DROP_RELATIONS_BSEARCH_LIMIT (or off-by-one bug?)
> IIUC bsearch is used when # of relations to be dropped is *more* than
> the value of DROP_RELATIONS_BSEARCH_LIMIT (10). IMO this behavior is
> not what the macro name implies; I thought that bsearch is used for 10
> relations. Besides, the word "LIMIT" is used as *upper limit* in
> other macros. How about MIN_DROP_REL[ATION]S_BSEARCH or
> DROP_REL[ATION]S_LINEAR_SEARCH_LIMIT?
> # using RELS instead of RELATIONS would be better to shorten the name
>
I've changed the name to DROP_RELS_BSEARCH_THRESHOLD. I believe this
naming is consistent with options like "geqo_threshold" - when the
number of relations reaches the specified value, the bsearch is used.
I've also increased the value from 10 to 20, in accordance with the
previous discussion.
>
> * +1 for Alvaro's suggestion about avoiding palloc traffic by starting
> with 8 elements or so.
>
Done.
regards
Tomas
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 8dc622a..d8dabc6 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -312,6 +312,11 @@ smgrDoPendingDeletes(bool isCommit)
PendingRelDelete *pending;
PendingRelDelete *prev;
PendingRelDelete *next;
+
+ SMgrRelation *srels = palloc(sizeof(SMgrRelation));
+ int nrels = 0,
+ i = 0,
+ maxrels = 8;
prev = NULL;
for (pending = pendingDeletes; pending != NULL; pending = next)
@@ -335,14 +340,32 @@ smgrDoPendingDeletes(bool isCommit)
SMgrRelation srel;
srel = smgropen(pending->relnode, pending->backend);
- smgrdounlink(srel, false);
- smgrclose(srel);
+
+ /* extend the array if needed (double the size) */
+ if (maxrels <= nrels)
+ {
+ maxrels *= 2;
+ srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
+ }
+
+ srels[nrels++] = srel;
}
/* must explicitly free the list entry */
pfree(pending);
/* prev does not change */
}
}
+
+ if (nrels > 0)
+ {
+ smgrdounlinkall(srels, nrels, false);
+
+ for (i = 0; i < nrels; i++)
+ smgrclose(srels[i]);
+ }
+
+ pfree(srels);
+
}
/*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index dddb6c0..1665630 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -62,6 +62,7 @@
#define BUF_WRITTEN 0x01
#define BUF_REUSABLE 0x02
+#define DROP_RELS_BSEARCH_THRESHOLD 15
/* GUC variables */
bool zero_damaged_pages = false;
@@ -108,6 +109,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr,
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
static void AtProcExit_Buffers(int code, Datum arg);
+static int rnode_comparator(const void * p1, const void * p2);
/*
* PrefetchBuffer -- initiate asynchronous read of a block of a relation
@@ -2094,35 +2096,87 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
* --------------------------------------------------------------------
*/
void
-DropRelFileNodeAllBuffers(RelFileNodeBackend rnode)
+DropRelFileNodeAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
{
- int i;
+ int i, j, n = 0;
+ RelFileNode *nodes;
+
+ nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */
/* If it's a local relation, it's localbuf.c's problem. */
- if (RelFileNodeBackendIsTemp(rnode))
+ for (i = 0; i < nnodes; i++)
{
- if (rnode.backend == MyBackendId)
- DropRelFileNodeAllLocalBuffers(rnode.node);
+ if (RelFileNodeBackendIsTemp(rnodes[i]))
+ {
+ if (rnodes[i].backend == MyBackendId)
+ DropRelFileNodeAllLocalBuffers(rnodes[i].node);
+ }
+ else
+ {
+ nodes[n++] = rnodes[i].node;
+ }
+ }
+
+ /*
+ * If there are no non-local relations, then we're done. Release the memory
+ * and return.
+ */
+ if (n == 0)
+ {
+ pfree(nodes);
return;
}
+ /* sort the list of rnodes (but only if we're going to use the bsearch) */
+ if (n > DROP_RELS_BSEARCH_THRESHOLD)
+ pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator);
+
for (i = 0; i < NBuffers; i++)
{
+ RelFileNode *rnode = NULL;
volatile BufferDesc *bufHdr = &BufferDescriptors[i];
-
+
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
* and saves some cycles.
*/
- if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
+
+ /*
+ * For low number of relations to drop just use a simple walk through,
+ * to save the bsearch overhead. The BSEARCH_LIMIT is rather a guess
+ * than a exactly determined value, as it depends on many factors (CPU
+ * and RAM speeds, amount of shared buffers etc.).
+ */
+ if (n < DROP_RELS_BSEARCH_THRESHOLD)
+ {
+ for (j = 0; j < n; j++)
+ {
+ if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j]))
+ {
+ rnode = &nodes[j];
+ break;
+ }
+ }
+ }
+ else
+ {
+ rnode = bsearch((const void *) &(bufHdr->tag.rnode),
+ nodes, n, sizeof(RelFileNode),
+ rnode_comparator);
+ }
+
+ /* buffer does not belong to any of the relations */
+ if (rnode == NULL)
continue;
LockBufHdr(bufHdr);
- if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
+ if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode)))
InvalidateBuffer(bufHdr); /* releases spinlock */
else
UnlockBufHdr(bufHdr);
}
+
+ pfree(nodes);
}
/* ---------------------------------------------------------------------
@@ -2953,3 +3007,31 @@ local_buffer_write_error_callback(void *arg)
pfree(path);
}
}
+
+/*
+ * Used to sort relfilenode array (ordered by [relnode, dbnode, spcnode]), so
+ * that it's suitable for bsearch.
+ */
+static int
+rnode_comparator(const void *p1, const void *p2)
+{
+ RelFileNode n1 = *(RelFileNode *) p1;
+ RelFileNode n2 = *(RelFileNode *) p2;
+
+ if (n1.relNode < n2.relNode)
+ return -1;
+ else if (n1.relNode > n2.relNode)
+ return 1;
+
+ if (n1.dbNode < n2.dbNode)
+ return -1;
+ else if (n1.dbNode > n2.dbNode)
+ return 1;
+
+ if (n1.spcNode < n2.spcNode)
+ return -1;
+ else if (n1.spcNode > n2.spcNode)
+ return 1;
+ else
+ return 0;
+}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 5dff8b3..f98ba27 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -390,7 +390,7 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
* Get rid of any remaining buffers for the relation. bufmgr will just
* drop them without bothering to write the contents.
*/
- DropRelFileNodeAllBuffers(rnode);
+ DropRelFileNodeAllBuffers(&rnode, 1);
/*
* It'd be nice to tell the stats collector to forget it immediately, too.
@@ -419,6 +419,69 @@ smgrdounlink(SMgrRelation reln, bool isRedo)
(*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
}
+void
+smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
+{
+ int i = 0;
+ RelFileNodeBackend *rnodes;
+ ForkNumber forknum;
+
+ /* initialize an array which contains all relations to be dropped */
+ rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
+ for (i = 0; i < nrels; i++)
+ {
+ RelFileNodeBackend rnode = rels[i]->smgr_rnode;
+ int which = rels[i]->smgr_which;
+
+ rnodes[i] = rnode;
+
+ /* Close the forks at smgr level */
+ for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ (*(smgrsw[which].smgr_close)) (rels[i], forknum);
+ }
+
+ /*
+ * Get rid of any remaining buffers for the relation. bufmgr will just
+ * drop them without bothering to write the contents.
+ */
+ DropRelFileNodeAllBuffers(rnodes, nrels);
+
+ /*
+ * It'd be nice to tell the stats collector to forget it immediately, too.
+ * But we can't because we don't know the OID (and in cases involving
+ * relfilenode swaps, it's not always clear which table OID to forget,
+ * anyway).
+ */
+
+ /*
+ * Send a shared-inval message to force other backends to close any
+ * dangling smgr references they may have for this rel. We should do this
+ * before starting the actual unlinking, in case we fail partway through
+ * that step. Note that the sinval message will eventually come back to
+ * this backend, too, and thereby provide a backstop that we closed our
+ * own smgr rel.
+ */
+ for (i = 0; i < nrels; i++)
+ CacheInvalidateSmgr(rnodes[i]);
+
+ /*
+ * Delete the physical file(s).
+ *
+ * Note: smgr_unlink must treat deletion failure as a WARNING, not an
+ * ERROR, because we've already decided to commit or abort the current
+ * xact.
+ */
+
+ for (i = 0; i < nrels; i++)
+ {
+ int which = rels[i]->smgr_which;
+ for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ (*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo);
+ }
+
+ pfree(rnodes);
+}
+
/*
* smgrdounlinkfork() -- Immediately unlink one fork of a relation.
*
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 51eb77b..1deee42 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -188,7 +188,7 @@ extern void FlushRelationBuffers(Relation rel);
extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
ForkNumber forkNum, BlockNumber firstDelBlock);
-extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode);
+extern void DropRelFileNodeAllBuffers(RelFileNodeBackend * rnodes, int nnodes);
extern void DropDatabaseBuffers(Oid dbid);
#define RelationGetNumberOfBlocks(reln) \
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 9eccf76..272476b 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -85,6 +85,7 @@ extern void smgrcloseall(void);
extern void smgrclosenode(RelFileNodeBackend rnode);
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void smgrdounlink(SMgrRelation reln, bool isRedo);
+extern void smgrdounlinkall(SMgrRelation * rels, int nrels, bool isRedo);
extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers