Zdenek Kotala wrote:
I tested your patch and it looks good. however I have several
comments/questions:
1) probes contains pointer but in probe.h it is declared as int. Is it
correct?
The probes cast the pointer to uintptr_t, so the correct type that will
work for both ILP32 and LP64 models is unsigned long. I've made the
correction from unsigned int to unsigned long. The reason uintptr_t is
not used in the probe definition is because it causes compilation
problem on Mac OS X. This is a known problem in the OS X's DTrace
implementation.
2) Maybe
TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1);
would be better. Because slru_errcause, slru_errno can contains garbage
in situation when everything goes fine. Same for write.
I've made the changes per your suggestion although one can argue that
the script can check arg0, and if it's true, avoid using arg1 and arg2
as they are meaningless.
I think it is committable for 8.4.
That would be awesome!
-Robert
Index: src/backend/access/transam/slru.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/slru.c,v
retrieving revision 1.45
diff -u -3 -p -r1.45 slru.c
--- src/backend/access/transam/slru.c 1 Jan 2009 17:23:36 -0000 1.45
+++ src/backend/access/transam/slru.c 8 Mar 2009 20:39:01 -0000
@@ -57,6 +57,7 @@
#include "storage/fd.h"
#include "storage/shmem.h"
#include "miscadmin.h"
+#include "pg_trace.h"
/*
@@ -372,6 +373,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen
{
SlruShared shared = ctl->shared;
+ TRACE_POSTGRESQL_SLRU_READPAGE_START((uintptr_t)ctl, pageno, write_ok,
xid);
/* Outer loop handles restart if we must wait for someone else's I/O */
for (;;)
{
@@ -399,6 +401,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen
}
/* Otherwise, it's ready to use */
SlruRecentlyUsed(shared, slotno);
+ TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno);
return slotno;
}
@@ -446,6 +449,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen
SlruReportIOError(ctl, pageno, xid);
SlruRecentlyUsed(shared, slotno);
+ TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno);
return slotno;
}
}
@@ -470,6 +474,8 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl,
SlruShared shared = ctl->shared;
int slotno;
+ TRACE_POSTGRESQL_SLRU_READPAGE_READONLY((uintptr_t)ctl, pageno, xid);
+
/* Try to find the page while holding only shared lock */
LWLockAcquire(shared->ControlLock, LW_SHARED);
@@ -511,6 +517,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot
int pageno = shared->page_number[slotno];
bool ok;
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_START((uintptr_t)ctl, pageno, slotno);
+
/* If a write is in progress, wait for it to finish */
while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
shared->page_number[slotno] == pageno)
@@ -525,7 +533,10 @@ SimpleLruWritePage(SlruCtl ctl, int slot
if (!shared->page_dirty[slotno] ||
shared->page_status[slotno] != SLRU_PAGE_VALID ||
shared->page_number[slotno] != pageno)
+ {
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE();
return;
+ }
/*
* Mark the slot write-busy, and clear the dirtybit. After this point,
a
@@ -569,6 +580,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot
/* Now it's okay to ereport if we failed */
if (!ok)
SlruReportIOError(ctl, pageno, InvalidTransactionId);
+
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE();
}
/*
@@ -593,6 +606,8 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa
SlruFileName(ctl, path, segno);
+ TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_START((uintptr_t)ctl, path,
pageno, slotno);
+
/*
* In a crash-and-restart situation, it's possible for us to receive
* commands to set the commit status of transactions whose bits are in
@@ -607,6 +622,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
+ TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
@@ -614,6 +630,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa
(errmsg("file \"%s\" doesn't exist, reading as
zeroes",
path)));
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
+ TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1);
return true;
}
@@ -622,6 +639,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa
slru_errcause = SLRU_SEEK_FAILED;
slru_errno = errno;
close(fd);
+ TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
@@ -631,6 +649,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa
slru_errcause = SLRU_READ_FAILED;
slru_errno = errno;
close(fd);
+ TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
@@ -638,9 +657,12 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa
{
slru_errcause = SLRU_CLOSE_FAILED;
slru_errno = errno;
+ TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
+ TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1);
+
return true;
}
@@ -668,6 +690,8 @@ SlruPhysicalWritePage(SlruCtl ctl, int p
char path[MAXPGPATH];
int fd = -1;
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_START((uintptr_t)ctl, pageno,
slotno);
+
/*
* Honor the write-WAL-before-data rule, if appropriate, so that we do
not
* write out data before associated WAL records. This is the same
action
@@ -753,6 +777,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p
{
slru_errcause = SLRU_OPEN_FAILED;
slru_errno = errno;
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
@@ -781,6 +806,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p
slru_errno = errno;
if (!fdata)
close(fd);
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
@@ -794,6 +820,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p
slru_errno = errno;
if (!fdata)
close(fd);
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
@@ -808,6 +835,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p
slru_errcause = SLRU_FSYNC_FAILED;
slru_errno = errno;
close(fd);
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
@@ -815,10 +843,12 @@ SlruPhysicalWritePage(SlruCtl ctl, int p
{
slru_errcause = SLRU_CLOSE_FAILED;
slru_errno = errno;
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false,
slru_errcause, slru_errno);
return false;
}
}
+ TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(true, -1, -1);
return true;
}
Index: src/backend/executor/execScan.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/execScan.c,v
retrieving revision 1.44
diff -u -3 -p -r1.44 execScan.c
--- src/backend/executor/execScan.c 1 Jan 2009 17:23:41 -0000 1.44
+++ src/backend/executor/execScan.c 8 Mar 2009 20:39:02 -0000
@@ -21,6 +21,7 @@
#include "executor/executor.h"
#include "miscadmin.h"
#include "utils/memutils.h"
+#include "pg_trace.h"
static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno,
TupleDesc tupdesc);
@@ -60,6 +61,8 @@ ExecScan(ScanState *node,
qual = node->ps.qual;
projInfo = node->ps.ps_ProjInfo;
+ TRACE_POSTGRESQL_EXECUTOR_SCAN((uintptr_t)node, ((Scan
*)node->ps.plan)->scanrelid, (uintptr_t)accessMtd);
+
/*
* If we have neither a qual to check nor a projection to do, just skip
* all the overhead and return the raw scan tuple.
Index: src/backend/executor/nodeAgg.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeAgg.c,v
retrieving revision 1.164
diff -u -3 -p -r1.164 nodeAgg.c
--- src/backend/executor/nodeAgg.c 1 Jan 2009 17:23:41 -0000 1.164
+++ src/backend/executor/nodeAgg.c 8 Mar 2009 20:39:02 -0000
@@ -86,6 +86,7 @@
#include "utils/syscache.h"
#include "utils/tuplesort.h"
#include "utils/datum.h"
+#include "pg_trace.h"
/*
@@ -814,6 +815,8 @@ ExecAgg(AggState *node)
if (node->agg_done)
return NULL;
+ TRACE_POSTGRESQL_EXECUTOR_AGG((uintptr_t)node, ((Agg *)
node->ss.ps.plan)->aggstrategy);
+
/*
* Check to see if we're still projecting out tuples from a previous agg
* tuple (because there is a function-returning-set in the projection
Index: src/backend/executor/nodeGroup.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeGroup.c,v
retrieving revision 1.73
diff -u -3 -p -r1.73 nodeGroup.c
--- src/backend/executor/nodeGroup.c 1 Jan 2009 17:23:41 -0000 1.73
+++ src/backend/executor/nodeGroup.c 8 Mar 2009 20:39:02 -0000
@@ -24,6 +24,7 @@
#include "executor/executor.h"
#include "executor/nodeGroup.h"
+#include "pg_trace.h"
/*
@@ -49,6 +50,8 @@ ExecGroup(GroupState *node)
numCols = ((Group *) node->ss.ps.plan)->numCols;
grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx;
+ TRACE_POSTGRESQL_EXECUTOR_GROUP((uintptr_t)node, numCols);
+
/*
* Check to see if we're still projecting out tuples from a previous
group
* tuple (because there is a function-returning-set in the projection
Index: src/backend/executor/nodeHash.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHash.c,v
retrieving revision 1.117
diff -u -3 -p -r1.117 nodeHash.c
--- src/backend/executor/nodeHash.c 1 Jan 2009 17:23:41 -0000 1.117
+++ src/backend/executor/nodeHash.c 8 Mar 2009 20:39:02 -0000
@@ -35,6 +35,7 @@
#include "utils/dynahash.h"
#include "utils/memutils.h"
#include "utils/lsyscache.h"
+#include "pg_trace.h"
static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
@@ -70,6 +71,8 @@ MultiExecHash(HashState *node)
ExprContext *econtext;
uint32 hashvalue;
+ TRACE_POSTGRESQL_EXECUTOR_HASH_MULTI((uintptr_t)node);
+
/* must provide our own instrumentation support */
if (node->ps.instrument)
InstrStartNode(node->ps.instrument);
Index: src/backend/executor/nodeHashjoin.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v
retrieving revision 1.97
diff -u -3 -p -r1.97 nodeHashjoin.c
--- src/backend/executor/nodeHashjoin.c 1 Jan 2009 17:23:41 -0000 1.97
+++ src/backend/executor/nodeHashjoin.c 8 Mar 2009 20:39:02 -0000
@@ -20,6 +20,7 @@
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "utils/memutils.h"
+#include "pg_trace.h"
/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
@@ -61,6 +62,8 @@ ExecHashJoin(HashJoinState *node)
uint32 hashvalue;
int batchno;
+ TRACE_POSTGRESQL_EXECUTOR_HASHJOIN((uintptr_t)node);
+
/*
* get information from HashJoin node
*/
Index: src/backend/executor/nodeLimit.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeLimit.c,v
retrieving revision 1.36
diff -u -3 -p -r1.36 nodeLimit.c
--- src/backend/executor/nodeLimit.c 4 Mar 2009 10:55:00 -0000 1.36
+++ src/backend/executor/nodeLimit.c 8 Mar 2009 20:39:02 -0000
@@ -23,6 +23,7 @@
#include "executor/executor.h"
#include "executor/nodeLimit.h"
+#include "pg_trace.h"
static void recompute_limits(LimitState *node);
@@ -41,6 +42,8 @@ ExecLimit(LimitState *node)
TupleTableSlot *slot;
PlanState *outerPlan;
+ TRACE_POSTGRESQL_EXECUTOR_LIMIT((uintptr_t)node);
+
/*
* get information from the node
*/
Index: src/backend/executor/nodeMaterial.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMaterial.c,v
retrieving revision 1.65
diff -u -3 -p -r1.65 nodeMaterial.c
--- src/backend/executor/nodeMaterial.c 1 Jan 2009 17:23:42 -0000 1.65
+++ src/backend/executor/nodeMaterial.c 8 Mar 2009 20:39:02 -0000
@@ -24,6 +24,7 @@
#include "executor/executor.h"
#include "executor/nodeMaterial.h"
#include "miscadmin.h"
+#include "pg_trace.h"
/* ----------------------------------------------------------------
* ExecMaterial
@@ -45,6 +46,8 @@ ExecMaterial(MaterialState *node)
bool eof_tuplestore;
TupleTableSlot *slot;
+ TRACE_POSTGRESQL_EXECUTOR_MATERIAL((uintptr_t)node);
+
/*
* get state info from node
*/
Index: src/backend/executor/nodeMergejoin.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v
retrieving revision 1.94
diff -u -3 -p -r1.94 nodeMergejoin.c
--- src/backend/executor/nodeMergejoin.c 1 Jan 2009 17:23:42 -0000
1.94
+++ src/backend/executor/nodeMergejoin.c 8 Mar 2009 20:39:02 -0000
@@ -102,6 +102,7 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
+#include "pg_trace.h"
/*
@@ -565,6 +566,8 @@ ExecMergeJoin(MergeJoinState *node)
bool doFillOuter;
bool doFillInner;
+ TRACE_POSTGRESQL_EXECUTOR_MERGEJOIN((uintptr_t)node);
+
/*
* get information from node
*/
Index: src/backend/executor/nodeNestloop.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v
retrieving revision 1.50
diff -u -3 -p -r1.50 nodeNestloop.c
--- src/backend/executor/nodeNestloop.c 1 Jan 2009 17:23:42 -0000 1.50
+++ src/backend/executor/nodeNestloop.c 8 Mar 2009 20:39:02 -0000
@@ -24,6 +24,7 @@
#include "executor/execdebug.h"
#include "executor/nodeNestloop.h"
#include "utils/memutils.h"
+#include "pg_trace.h"
/* ----------------------------------------------------------------
@@ -67,6 +68,8 @@ ExecNestLoop(NestLoopState *node)
List *otherqual;
ExprContext *econtext;
+ TRACE_POSTGRESQL_EXECUTOR_NESTLOOP((uintptr_t)node);
+
/*
* get information from the node
*/
Index: src/backend/executor/nodeSetOp.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v
retrieving revision 1.28
diff -u -3 -p -r1.28 nodeSetOp.c
--- src/backend/executor/nodeSetOp.c 1 Jan 2009 17:23:42 -0000 1.28
+++ src/backend/executor/nodeSetOp.c 8 Mar 2009 20:39:02 -0000
@@ -47,6 +47,7 @@
#include "executor/executor.h"
#include "executor/nodeSetOp.h"
#include "utils/memutils.h"
+#include "pg_trace.h"
/*
@@ -196,6 +197,8 @@ ExecSetOp(SetOpState *node)
SetOp *plannode = (SetOp *) node->ps.plan;
TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
+ TRACE_POSTGRESQL_EXECUTOR_SETOP((uintptr_t)node);
+
/*
* If the previously-returned tuple needs to be returned more than once,
* keep returning it.
Index: src/backend/executor/nodeSort.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSort.c,v
retrieving revision 1.63
diff -u -3 -p -r1.63 nodeSort.c
--- src/backend/executor/nodeSort.c 1 Jan 2009 17:23:42 -0000 1.63
+++ src/backend/executor/nodeSort.c 8 Mar 2009 20:39:02 -0000
@@ -19,6 +19,7 @@
#include "executor/nodeSort.h"
#include "miscadmin.h"
#include "utils/tuplesort.h"
+#include "pg_trace.h"
/* ----------------------------------------------------------------
@@ -53,6 +54,8 @@ ExecSort(SortState *node)
dir = estate->es_direction;
tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
+ TRACE_POSTGRESQL_EXECUTOR_SORT((uintptr_t)node, dir);
+
/*
* If first time through, read all tuples from outer plan and pass them
to
* tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
Index: src/backend/executor/nodeSubplan.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v
retrieving revision 1.96
diff -u -3 -p -r1.96 nodeSubplan.c
--- src/backend/executor/nodeSubplan.c 1 Jan 2009 17:23:42 -0000 1.96
+++ src/backend/executor/nodeSubplan.c 8 Mar 2009 20:39:02 -0000
@@ -27,6 +27,7 @@
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "pg_trace.h"
static Datum ExecSubPlan(SubPlanState *node,
@@ -92,6 +93,8 @@ ExecHashSubPlan(SubPlanState *node,
ExprContext *innerecontext = node->innerecontext;
TupleTableSlot *slot;
+ TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_HASH((uintptr_t)node);
+
/* Shouldn't have any direct correlation Vars */
if (subplan->parParam != NIL || node->args != NIL)
elog(ERROR, "hashed subplan with direct correlation not
supported");
@@ -227,6 +230,8 @@ ExecScanSubPlan(SubPlanState *node,
ListCell *l;
ArrayBuildState *astate = NULL;
+ TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_SCAN((uintptr_t)node);
+
/*
* We are probably in a short-lived expression-evaluation context.
Switch
* to the per-query context for manipulating the child plan's chgParam,
Index: src/backend/executor/nodeUnique.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeUnique.c,v
retrieving revision 1.58
diff -u -3 -p -r1.58 nodeUnique.c
--- src/backend/executor/nodeUnique.c 1 Jan 2009 17:23:42 -0000 1.58
+++ src/backend/executor/nodeUnique.c 8 Mar 2009 20:39:02 -0000
@@ -36,6 +36,7 @@
#include "executor/executor.h"
#include "executor/nodeUnique.h"
#include "utils/memutils.h"
+#include "pg_trace.h"
/* ----------------------------------------------------------------
@@ -50,6 +51,8 @@ ExecUnique(UniqueState *node)
TupleTableSlot *slot;
PlanState *outerPlan;
+ TRACE_POSTGRESQL_EXECUTOR_UNIQUE((uintptr_t)node);
+
/*
* get information from the node
*/
Index: src/backend/utils/probes.d
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/probes.d,v
retrieving revision 1.6
diff -u -3 -p -r1.6 probes.d
--- src/backend/utils/probes.d 1 Jan 2009 17:23:48 -0000 1.6
+++ src/backend/utils/probes.d 8 Mar 2009 20:39:07 -0000
@@ -10,6 +10,7 @@
/* typedefs used in PostgreSQL */
#define LocalTransactionId unsigned int
+#define TransactionId unsigned int
#define LWLockId int
#define LWLockMode int
#define LOCKMODE int
@@ -91,4 +92,29 @@ provider postgresql {
probe xlog__switch();
probe wal__buffer__write__dirty__start();
probe wal__buffer__write__dirty__done();
+
+ probe slru__readpage__start(unsigned long, int, bool, TransactionId);
+ probe slru__readpage__done(int);
+ probe slru__readpage__readonly(unsigned long, int, TransactionId);
+ probe slru__writepage__start(unsigned long, int, int);
+ probe slru__writepage__done();
+ probe slru__readpage__physical__start(unsigned long, char *, int, int);
+ probe slru__readpage__physical__done(int, int, int);
+ probe slru__writepage__physical__start(unsigned long, int, int);
+ probe slru__writepage__physical__done(int, int, int);
+
+ probe executor__scan(unsigned long, unsigned int, unsigned long);
+ probe executor__agg(unsigned long, int);
+ probe executor__group(unsigned long, int);
+ probe executor__hash__multi(unsigned long);
+ probe executor__hashjoin(unsigned long);
+ probe executor__limit(unsigned long);
+ probe executor__material(unsigned long);
+ probe executor__mergejoin(unsigned long);
+ probe executor__nestloop(unsigned long);
+ probe executor__setop(unsigned long);
+ probe executor__sort(unsigned long, int);
+ probe executor__subplan__hash(unsigned long);
+ probe executor__subplan__scan(unsigned long);
+ probe executor__unique(unsigned long);
};
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers