Hi, While working on the foreign-join-pushdown issue, I noticed that in READ COMMITTED isolation level it's possible that the result of SELECT ... ORDER BY ... FOR UPDATE is not sorted correctly due to concurrent updates that replaced the sort key columns with new values as shown in the below example. That seems odd to me. So, I'd like to propose raising an error rather than returning a possibly-incorrect result for cases where the sorted tuples to be locked were modified by concurrent updates. Patch attached. Is it OK to add this to the current CF?
Create an environment: postgres=# create table test (a int); CREATE TABLE postgres=# insert into test values (1); INSERT 0 1 postgres=# insert into test values (2); INSERT 0 1 Run an example: [Terminal 1] postgres=# begin; BEGIN postgres=# update test set a = 3 where a = 1; UPDATE 1 [Terminal 2] postgres=# select * from test order by a for update; [Terminal 1] postgres=# commit; COMMIT [Terminal 2] (The following result will be shown after the commit in Terminal 1. Note that the output ordering is not correct.) a --- 3 2 (2 rows) Best regards, Etsuro Fujita
*** a/src/backend/executor/nodeLockRows.c --- b/src/backend/executor/nodeLockRows.c *************** *** 39,44 **** TupleTableSlot * /* return: a tuple or NULL */ --- 39,45 ---- ExecLockRows(LockRowsState *node) { TupleTableSlot *slot; + Plan *plan; EState *estate; PlanState *outerPlan; bool epq_needed; *************** *** 47,52 **** ExecLockRows(LockRowsState *node) --- 48,54 ---- /* * get information from the node */ + plan = node->ps.plan; estate = node->ps.state; outerPlan = outerPlanState(node); *************** *** 214,219 **** lnext: --- 216,225 ---- ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + if (((LockRows *) plan)->ordering) + ereport(ERROR, + (errcode(ERRCODE_TRANSACTION_ROLLBACK), + errmsg("tuple to be locked was changed due to concurrent update"))); if (ItemPointerEquals(&hufd.ctid, &tuple.t_self)) { /* Tuple was deleted, so don't return it */ *** a/src/backend/nodes/copyfuncs.c --- b/src/backend/nodes/copyfuncs.c *************** *** 967,972 **** _copyLockRows(const LockRows *from) --- 967,973 ---- /* * copy remainder of node */ + COPY_SCALAR_FIELD(ordering); COPY_NODE_FIELD(rowMarks); COPY_SCALAR_FIELD(epqParam); *** a/src/backend/nodes/outfuncs.c --- b/src/backend/nodes/outfuncs.c *************** *** 842,847 **** _outLockRows(StringInfo str, const LockRows *node) --- 842,848 ---- _outPlanInfo(str, (const Plan *) node); + WRITE_BOOL_FIELD(ordering); WRITE_NODE_FIELD(rowMarks); WRITE_INT_FIELD(epqParam); } *** a/src/backend/optimizer/plan/createplan.c --- b/src/backend/optimizer/plan/createplan.c *************** *** 4801,4807 **** make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, * Build a LockRows plan node */ LockRows * ! make_lockrows(Plan *lefttree, List *rowMarks, int epqParam) { LockRows *node = makeNode(LockRows); Plan *plan = &node->plan; --- 4801,4807 ---- * Build a LockRows plan node */ LockRows * ! make_lockrows(Plan *lefttree, bool ordering, List *rowMarks, int epqParam) { LockRows *node = makeNode(LockRows); Plan *plan = &node->plan; *************** *** 4816,4821 **** make_lockrows(Plan *lefttree, List *rowMarks, int epqParam) --- 4816,4822 ---- plan->lefttree = lefttree; plan->righttree = NULL; + node->ordering = ordering; node->rowMarks = rowMarks; node->epqParam = epqParam; *** a/src/backend/optimizer/plan/planner.c --- b/src/backend/optimizer/plan/planner.c *************** *** 2302,2308 **** grouping_planner(PlannerInfo *root, double tuple_fraction) --- 2302,2311 ---- */ if (parse->rowMarks) { + bool ordering = parse->sortClause ? true : false; + result_plan = (Plan *) make_lockrows(result_plan, + ordering, root->rowMarks, SS_assign_special_param(root)); *** a/src/include/nodes/plannodes.h --- b/src/include/nodes/plannodes.h *************** *** 809,814 **** typedef struct SetOp --- 809,815 ---- typedef struct LockRows { Plan plan; + bool ordering; /* do we need to preserve the ordering? */ List *rowMarks; /* a list of PlanRowMark's */ int epqParam; /* ID of Param for EvalPlanQual re-eval */ } LockRows; *** a/src/include/optimizer/planmain.h --- b/src/include/optimizer/planmain.h *************** *** 74,80 **** extern Group *make_group(PlannerInfo *root, List *tlist, List *qual, Plan *lefttree); extern Plan *materialize_finished_plan(Plan *subplan); extern Unique *make_unique(Plan *lefttree, List *distinctList); ! extern LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, int64 offset_est, int64 count_est); extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, --- 74,80 ---- Plan *lefttree); extern Plan *materialize_finished_plan(Plan *subplan); extern Unique *make_unique(Plan *lefttree, List *distinctList); ! extern LockRows *make_lockrows(Plan *lefttree, bool ordering, List *rowMarks, int epqParam); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, int64 offset_est, int64 count_est); extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers