On Tue, Jan 24, 2023 at 03:57:56PM +0900, Michael Paquier wrote: > Makes sense. That would be my intention if 0004 is the most > acceptable and splitting things makes things a bit easier to review.
There was a silly mistake in 0004 where the jumbling code relied on compute_query_id rather than utility_query_id, so fixed and rebased as of v7 attached. -- Michael
From e1a35c02927ed38b04b1f628c460ef05f706621f Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Tue, 24 Jan 2023 15:29:06 +0900 Subject: [PATCH v7 3/4] Support for automated query jumble with all Nodes This applies query jumbling in a consistent way to all the Nodes, including DDLs & friends. --- src/include/nodes/bitmapset.h | 2 +- src/include/nodes/nodes.h | 13 +- src/include/nodes/parsenodes.h | 126 ++-- src/include/nodes/primnodes.h | 268 ++++---- src/backend/nodes/README | 1 + src/backend/nodes/gen_node_support.pl | 114 +++- src/backend/nodes/meson.build | 2 +- src/backend/nodes/queryjumblefuncs.c | 852 ++++++-------------------- 8 files changed, 519 insertions(+), 859 deletions(-) diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 0dca6bc5fa..3d2225e1ae 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -50,7 +50,7 @@ typedef int32 signedbitmapword; /* must be the matching signed type */ typedef struct Bitmapset { - pg_node_attr(custom_copy_equal, special_read_write) + pg_node_attr(custom_copy_equal, special_read_write, no_query_jumble) NodeTag type; int nwords; /* number of words in array */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 10752e8011..d7a9e38436 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -53,16 +53,20 @@ typedef enum NodeTag * - custom_read_write: Has custom implementations in outfuncs.c and * readfuncs.c. * + * - custom_query_jumble: Has custom implementation in queryjumblefuncs.c. + * * - no_copy: Does not support copyObject() at all. * * - no_equal: Does not support equal() at all. * * - no_copy_equal: Shorthand for both no_copy and no_equal. * + * - no_query_jumble: Does not support JumbleQuery() at all. + * * - no_read: Does not support nodeRead() at all. * - * - nodetag_only: Does not support copyObject(), equal(), outNode(), - * or nodeRead(). + * - nodetag_only: Does not support copyObject(), equal(), jumbleQuery() + * outNode() or nodeRead(). * * - special_read_write: Has special treatment in outNode() and nodeRead(). * @@ -97,6 +101,11 @@ typedef enum NodeTag * - equal_ignore_if_zero: Ignore the field for equality if it is zero. * (Otherwise, compare normally.) * + * - query_jumble_ignore: Ignore the field for the query jumbling. + * + * - query_jumble_location: Mark the field as a location to track. This is + * only allowed for integer fields that include "location" in their name. + * * - read_as(VALUE): In nodeRead(), replace the field's value with VALUE. * * - read_write_ignore: Ignore the field for read/write. This is only allowed diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 89335d95e7..f99fb5e909 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -116,6 +116,11 @@ typedef uint64 AclMode; /* a bitmask of privilege bits */ * * Planning converts a Query tree into a Plan tree headed by a PlannedStmt * node --- the Query structure is not used by the executor. + * + * All the fields ignored for the query jumbling are not semantically + * significant (such as alias names), as is ignored anything that can + * be deduced from child nodes (else we'd just be double-hashing that + * piece of information). */ typedef struct Query { @@ -124,45 +129,47 @@ typedef struct Query CmdType commandType; /* select|insert|update|delete|merge|utility */ /* where did I come from? */ - QuerySource querySource; + QuerySource querySource pg_node_attr(query_jumble_ignore); /* * query identifier (can be set by plugins); ignored for equal, as it - * might not be set; also not stored + * might not be set; also not stored. This is the result of the query + * jumble, hence ignored. */ - uint64 queryId pg_node_attr(equal_ignore, read_write_ignore, read_as(0)); + uint64 queryId pg_node_attr(equal_ignore, query_jumble_ignore, read_write_ignore, read_as(0)); /* do I set the command result tag? */ - bool canSetTag; + bool canSetTag pg_node_attr(query_jumble_ignore); Node *utilityStmt; /* non-null if commandType == CMD_UTILITY */ /* * rtable index of target relation for INSERT/UPDATE/DELETE/MERGE; 0 for - * SELECT. + * SELECT. This is ignored in the query jumble as unrelated to the + * compilation of the query ID. */ - int resultRelation; + int resultRelation pg_node_attr(query_jumble_ignore); /* has aggregates in tlist or havingQual */ - bool hasAggs; + bool hasAggs pg_node_attr(query_jumble_ignore); /* has window functions in tlist */ - bool hasWindowFuncs; + bool hasWindowFuncs pg_node_attr(query_jumble_ignore); /* has set-returning functions in tlist */ - bool hasTargetSRFs; + bool hasTargetSRFs pg_node_attr(query_jumble_ignore); /* has subquery SubLink */ - bool hasSubLinks; + bool hasSubLinks pg_node_attr(query_jumble_ignore); /* distinctClause is from DISTINCT ON */ - bool hasDistinctOn; + bool hasDistinctOn pg_node_attr(query_jumble_ignore); /* WITH RECURSIVE was specified */ - bool hasRecursive; + bool hasRecursive pg_node_attr(query_jumble_ignore); /* has INSERT/UPDATE/DELETE in WITH */ - bool hasModifyingCTE; + bool hasModifyingCTE pg_node_attr(query_jumble_ignore); /* FOR [KEY] UPDATE/SHARE was specified */ - bool hasForUpdate; + bool hasForUpdate pg_node_attr(query_jumble_ignore); /* rewriter has applied some RLS policy */ - bool hasRowSecurity; + bool hasRowSecurity pg_node_attr(query_jumble_ignore); /* is a RETURN statement */ - bool isReturn; + bool isReturn pg_node_attr(query_jumble_ignore); List *cteList; /* WITH list (of CommonTableExpr's) */ @@ -172,18 +179,18 @@ typedef struct Query * list of RTEPermissionInfo nodes for the rtable entries having * perminfoindex > 0 */ - List *rteperminfos; + List *rteperminfos pg_node_attr(query_jumble_ignore); FromExpr *jointree; /* table join tree (FROM and WHERE clauses); * also USING clause for MERGE */ List *mergeActionList; /* list of actions for MERGE (only) */ /* whether to use outer join */ - bool mergeUseOuterJoin; + bool mergeUseOuterJoin pg_node_attr(query_jumble_ignore); List *targetList; /* target list (of TargetEntry) */ /* OVERRIDING clause */ - OverridingKind override; + OverridingKind override pg_node_attr(query_jumble_ignore); OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */ @@ -215,10 +222,10 @@ typedef struct Query * A list of pg_constraint OIDs that the query depends on to be * semantically valid */ - List *constraintDeps; + List *constraintDeps pg_node_attr(query_jumble_ignore); /* a list of WithCheckOption's (added during rewrite) */ - List *withCheckOptions; + List *withCheckOptions pg_node_attr(query_jumble_ignore); /* * The following two fields identify the portion of the source text string @@ -229,7 +236,7 @@ typedef struct Query /* start location, or -1 if unknown */ int stmt_location; /* length in bytes; 0 means "rest of string" */ - int stmt_len; + int stmt_len pg_node_attr(query_jumble_ignore); } Query; @@ -1019,7 +1026,7 @@ typedef enum RTEKind typedef struct RangeTblEntry { - pg_node_attr(custom_read_write) + pg_node_attr(custom_read_write, custom_query_jumble) NodeTag type; @@ -1250,6 +1257,8 @@ typedef struct RTEPermissionInfo * time. We do however remember how many columns we thought the type had * (including dropped columns!), so that we can successfully ignore any * columns added after the query was parsed. + * + * The query jumbling needs only to track the function expression. */ typedef struct RangeTblFunction { @@ -1257,20 +1266,20 @@ typedef struct RangeTblFunction Node *funcexpr; /* expression tree for func call */ /* number of columns it contributes to RTE */ - int funccolcount; + int funccolcount pg_node_attr(query_jumble_ignore); /* These fields record the contents of a column definition list, if any: */ /* column names (list of String) */ - List *funccolnames; + List *funccolnames pg_node_attr(query_jumble_ignore); /* OID list of column type OIDs */ - List *funccoltypes; + List *funccoltypes pg_node_attr(query_jumble_ignore); /* integer list of column typmods */ - List *funccoltypmods; + List *funccoltypmods pg_node_attr(query_jumble_ignore); /* OID list of column collation OIDs */ - List *funccolcollations; + List *funccolcollations pg_node_attr(query_jumble_ignore); /* This is set during planning for use by the executor: */ /* PARAM_EXEC Param IDs affecting this func */ - Bitmapset *funcparams; + Bitmapset *funcparams pg_node_attr(query_jumble_ignore); } RangeTblFunction; /* @@ -1378,7 +1387,7 @@ typedef struct SortGroupClause Oid sortop; /* the ordering operator ('<' op), or 0 */ bool nulls_first; /* do NULLs come before normal values? */ /* can eqop be implemented by hashing? */ - bool hashable; + bool hashable pg_node_attr(query_jumble_ignore); } SortGroupClause; /* @@ -1443,7 +1452,7 @@ typedef enum GroupingSetKind typedef struct GroupingSet { NodeTag type; - GroupingSetKind kind; + GroupingSetKind kind pg_node_attr(query_jumble_ignore); List *content; int location; } GroupingSet; @@ -1464,35 +1473,38 @@ typedef struct GroupingSet * When refname isn't null, the partitionClause is always copied from there; * the orderClause might or might not be copied (see copiedOrder); the framing * options are never copied, per spec. + * + * The information relevant for the query jumbling is the partition clause + * type and its bounds. */ typedef struct WindowClause { NodeTag type; /* window name (NULL in an OVER clause) */ - char *name; + char *name pg_node_attr(query_jumble_ignore); /* referenced window name, if any */ - char *refname; + char *refname pg_node_attr(query_jumble_ignore); List *partitionClause; /* PARTITION BY list */ /* ORDER BY list */ - List *orderClause; + List *orderClause pg_node_attr(query_jumble_ignore); int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ /* qual to help short-circuit execution */ - List *runCondition; + List *runCondition pg_node_attr(query_jumble_ignore); /* in_range function for startOffset */ - Oid startInRangeFunc; + Oid startInRangeFunc pg_node_attr(query_jumble_ignore); /* in_range function for endOffset */ - Oid endInRangeFunc; + Oid endInRangeFunc pg_node_attr(query_jumble_ignore); /* collation for in_range tests */ - Oid inRangeColl; + Oid inRangeColl pg_node_attr(query_jumble_ignore); /* use ASC sort order for in_range tests? */ - bool inRangeAsc; + bool inRangeAsc pg_node_attr(query_jumble_ignore); /* nulls sort first for in_range tests? */ - bool inRangeNullsFirst; + bool inRangeNullsFirst pg_node_attr(query_jumble_ignore); Index winref; /* ID referenced by window functions */ /* did we copy orderClause from refname? */ - bool copiedOrder; + bool copiedOrder pg_node_attr(query_jumble_ignore); } WindowClause; /* @@ -1607,26 +1619,26 @@ typedef struct CommonTableExpr CTEMaterialize ctematerialized; /* is this an optimization fence? */ /* SelectStmt/InsertStmt/etc before parse analysis, Query afterwards: */ Node *ctequery; /* the CTE's subquery */ - CTESearchClause *search_clause; - CTECycleClause *cycle_clause; + CTESearchClause *search_clause pg_node_attr(query_jumble_ignore); + CTECycleClause *cycle_clause pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ /* These fields are set during parse analysis: */ /* is this CTE actually recursive? */ - bool cterecursive; + bool cterecursive pg_node_attr(query_jumble_ignore); /* * Number of RTEs referencing this CTE (excluding internal - * self-references) + * self-references), irrelevant for query jumbling. */ - int cterefcount; + int cterefcount pg_node_attr(query_jumble_ignore); /* list of output column names */ - List *ctecolnames; + List *ctecolnames pg_node_attr(query_jumble_ignore); /* OID list of output column type OIDs */ - List *ctecoltypes; + List *ctecoltypes pg_node_attr(query_jumble_ignore); /* integer list of output column typmods */ - List *ctecoltypmods; + List *ctecoltypmods pg_node_attr(query_jumble_ignore); /* OID list of column collation OIDs */ - List *ctecolcollations; + List *ctecolcollations pg_node_attr(query_jumble_ignore); } CommonTableExpr; /* Convenience macro to get the output tlist of a CTE's query */ @@ -1664,11 +1676,11 @@ typedef struct MergeAction bool matched; /* true=MATCHED, false=NOT MATCHED */ CmdType commandType; /* INSERT/UPDATE/DELETE/DO NOTHING */ /* OVERRIDING clause */ - OverridingKind override; + OverridingKind override pg_node_attr(query_jumble_ignore); Node *qual; /* transformed WHEN conditions */ List *targetList; /* the target list (of TargetEntry) */ /* target attribute numbers of an UPDATE */ - List *updateColnos; + List *updateColnos pg_node_attr(query_jumble_ignore); } MergeAction; /* @@ -1877,15 +1889,15 @@ typedef struct SetOperationStmt Node *rarg; /* right child */ /* Eventually add fields for CORRESPONDING spec here */ - /* Fields derived during parse analysis: */ + /* Fields derived during parse analysis (irrelevant for query jumbling): */ /* OID list of output column type OIDs */ - List *colTypes; + List *colTypes pg_node_attr(query_jumble_ignore); /* integer list of output column typmods */ - List *colTypmods; + List *colTypmods pg_node_attr(query_jumble_ignore); /* OID list of output column collation OIDs */ - List *colCollations; + List *colCollations pg_node_attr(query_jumble_ignore); /* a list of SortGroupClause's */ - List *groupClauses; + List *groupClauses pg_node_attr(query_jumble_ignore); /* groupClauses is NIL if UNION ALL, but must be set otherwise */ } SetOperationStmt; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index dec7d5c775..8d5b68a0bc 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -96,29 +96,29 @@ typedef struct TableFunc { NodeTag type; /* list of namespace URI expressions */ - List *ns_uris; + List *ns_uris pg_node_attr(query_jumble_ignore); /* list of namespace names or NULL */ - List *ns_names; + List *ns_names pg_node_attr(query_jumble_ignore); /* input document expression */ Node *docexpr; /* row filter expression */ Node *rowexpr; /* column names (list of String) */ - List *colnames; + List *colnames pg_node_attr(query_jumble_ignore); /* OID list of column type OIDs */ - List *coltypes; + List *coltypes pg_node_attr(query_jumble_ignore); /* integer list of column typmods */ - List *coltypmods; + List *coltypmods pg_node_attr(query_jumble_ignore); /* OID list of column collation OIDs */ - List *colcollations; + List *colcollations pg_node_attr(query_jumble_ignore); /* list of column filter expressions */ List *colexprs; /* list of column default expressions */ - List *coldefexprs; + List *coldefexprs pg_node_attr(query_jumble_ignore); /* nullability flag for each output column */ - Bitmapset *notnulls; + Bitmapset *notnulls pg_node_attr(query_jumble_ignore); /* counts from 0; -1 if none specified */ - int ordinalitycol; + int ordinalitycol pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; } TableFunc; @@ -227,11 +227,11 @@ typedef struct Var AttrNumber varattno; /* pg_type OID for the type of this var */ - Oid vartype; + Oid vartype pg_node_attr(query_jumble_ignore); /* pg_attribute typmod value */ - int32 vartypmod; + int32 vartypmod pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid varcollid; + Oid varcollid pg_node_attr(query_jumble_ignore); /* * for subquery variables referencing outer relations; 0 in a normal var, @@ -245,9 +245,9 @@ typedef struct Var * their varno/varattno match. */ /* syntactic relation index (0 if unknown) */ - Index varnosyn pg_node_attr(equal_ignore); + Index varnosyn pg_node_attr(equal_ignore, query_jumble_ignore); /* syntactic attribute number */ - AttrNumber varattnosyn pg_node_attr(equal_ignore); + AttrNumber varattnosyn pg_node_attr(equal_ignore, query_jumble_ignore); /* token location, or -1 if unknown */ int location; @@ -260,6 +260,8 @@ typedef struct Var * must be in non-extended form (4-byte header, no compression or external * references). This ensures that the Const node is self-contained and makes * it more likely that equal() will see logically identical values as equal. + * + * Only the constant type OID is relevant for the query jumbling. */ typedef struct Const { @@ -269,24 +271,27 @@ typedef struct Const /* pg_type OID of the constant's datatype */ Oid consttype; /* typmod value, if any */ - int32 consttypmod; + int32 consttypmod pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid constcollid; + Oid constcollid pg_node_attr(query_jumble_ignore); /* typlen of the constant's datatype */ - int constlen; + int constlen pg_node_attr(query_jumble_ignore); /* the constant's value */ - Datum constvalue; + Datum constvalue pg_node_attr(query_jumble_ignore); /* whether the constant is null (if true, constvalue is undefined) */ - bool constisnull; + bool constisnull pg_node_attr(query_jumble_ignore); /* * Whether this datatype is passed by value. If true, then all the * information is stored in the Datum. If false, then the Datum contains * a pointer to the information. */ - bool constbyval; - /* token location, or -1 if unknown */ - int location; + bool constbyval pg_node_attr(query_jumble_ignore); + /* + * token location, or -1 if unknown. All constants are tracked as + * locations in query jumbling, to be marked as parameters. + */ + int location pg_node_attr(query_jumble_location); } Const; /* @@ -324,6 +329,7 @@ typedef enum ParamKind PARAM_MULTIEXPR } ParamKind; +/* typmod and collation information are irrelevant for the query jumbling. */ typedef struct Param { Expr xpr; @@ -331,9 +337,9 @@ typedef struct Param int paramid; /* numeric ID for parameter */ Oid paramtype; /* pg_type OID of parameter's datatype */ /* typmod value, if known */ - int32 paramtypmod; + int32 paramtypmod pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid paramcollid; + Oid paramcollid pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; } Param; @@ -386,6 +392,9 @@ typedef struct Param * and can share the result. Aggregates with same 'transno' but different * 'aggno' can share the same transition state, only the final function needs * to be called separately. + * + * Information related to collations, transition types and internal states + * are irrelevant for the query jumbling. */ typedef struct Aggref { @@ -395,22 +404,22 @@ typedef struct Aggref Oid aggfnoid; /* type Oid of result of the aggregate */ - Oid aggtype; + Oid aggtype pg_node_attr(query_jumble_ignore); /* OID of collation of result */ - Oid aggcollid; + Oid aggcollid pg_node_attr(query_jumble_ignore); /* OID of collation that function should use */ - Oid inputcollid; + Oid inputcollid pg_node_attr(query_jumble_ignore); /* * type Oid of aggregate's transition value; ignored for equal since it * might not be set yet */ - Oid aggtranstype pg_node_attr(equal_ignore); + Oid aggtranstype pg_node_attr(equal_ignore, query_jumble_ignore); /* type Oids of direct and aggregated args */ - List *aggargtypes; + List *aggargtypes pg_node_attr(query_jumble_ignore); /* direct arguments, if an ordered-set agg */ List *aggdirectargs; @@ -428,31 +437,31 @@ typedef struct Aggref Expr *aggfilter; /* true if argument list was really '*' */ - bool aggstar; + bool aggstar pg_node_attr(query_jumble_ignore); /* * true if variadic arguments have been combined into an array last * argument */ - bool aggvariadic; + bool aggvariadic pg_node_attr(query_jumble_ignore); /* aggregate kind (see pg_aggregate.h) */ - char aggkind; + char aggkind pg_node_attr(query_jumble_ignore); /* aggregate input already sorted */ - bool aggpresorted pg_node_attr(equal_ignore); + bool aggpresorted pg_node_attr(equal_ignore, query_jumble_ignore); /* > 0 if agg belongs to outer query */ - Index agglevelsup; + Index agglevelsup pg_node_attr(query_jumble_ignore); /* expected agg-splitting mode of parent Agg */ - AggSplit aggsplit; + AggSplit aggsplit pg_node_attr(query_jumble_ignore); /* unique ID within the Agg node */ - int aggno; + int aggno pg_node_attr(query_jumble_ignore); /* unique ID of transition state in the Agg */ - int aggtransno; + int aggtransno pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; @@ -481,19 +490,22 @@ typedef struct Aggref * * In raw parse output we have only the args list; parse analysis fills in the * refs list, and the planner fills in the cols list. + * + * All the fields used as information for an internal state are irrelevant + * for the query jumbling. */ typedef struct GroupingFunc { Expr xpr; /* arguments, not evaluated but kept for benefit of EXPLAIN etc. */ - List *args; + List *args pg_node_attr(query_jumble_ignore); /* ressortgrouprefs of arguments */ List *refs pg_node_attr(equal_ignore); /* actual column positions set by planner */ - List *cols pg_node_attr(equal_ignore); + List *cols pg_node_attr(equal_ignore, query_jumble_ignore); /* same as Aggref.agglevelsup */ Index agglevelsup; @@ -504,6 +516,9 @@ typedef struct GroupingFunc /* * WindowFunc + * + * Collation information is irrelevant for the query jumbling, as is the + * internal state information of the node like "winstar" and "winagg". */ typedef struct WindowFunc { @@ -511,11 +526,11 @@ typedef struct WindowFunc /* pg_proc Oid of the function */ Oid winfnoid; /* type Oid of result of the window function */ - Oid wintype; + Oid wintype pg_node_attr(query_jumble_ignore); /* OID of collation of result */ - Oid wincollid; + Oid wincollid pg_node_attr(query_jumble_ignore); /* OID of collation that function should use */ - Oid inputcollid; + Oid inputcollid pg_node_attr(query_jumble_ignore); /* arguments to the window function */ List *args; /* FILTER expression, if any */ @@ -523,9 +538,9 @@ typedef struct WindowFunc /* index of associated WindowClause */ Index winref; /* true if argument list was really '*' */ - bool winstar; + bool winstar pg_node_attr(query_jumble_ignore); /* is function a simple aggregate? */ - bool winagg; + bool winagg pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; } WindowFunc; @@ -564,6 +579,8 @@ typedef struct WindowFunc * subscripting logic. Likewise, reftypmod and refcollid will match the * container's properties in a store, but could be different in a fetch. * + * Any internal state data is ignored for the query jumbling. + * * Note: for the cases where a container is returned, if refexpr yields a R/W * expanded container, then the implementation is allowed to modify that * object in-place and return the same object. @@ -572,15 +589,15 @@ typedef struct SubscriptingRef { Expr xpr; /* type of the container proper */ - Oid refcontainertype; + Oid refcontainertype pg_node_attr(query_jumble_ignore); /* the container type's pg_type.typelem */ - Oid refelemtype; + Oid refelemtype pg_node_attr(query_jumble_ignore); /* type of the SubscriptingRef's result */ - Oid refrestype; + Oid refrestype pg_node_attr(query_jumble_ignore); /* typmod of the result */ - int32 reftypmod; + int32 reftypmod pg_node_attr(query_jumble_ignore); /* collation of result, or InvalidOid if none */ - Oid refcollid; + Oid refcollid pg_node_attr(query_jumble_ignore); /* expressions that evaluate to upper container indexes */ List *refupperindexpr; @@ -631,6 +648,9 @@ typedef enum CoercionForm /* * FuncExpr - expression node for a function call + * + * Collation information is irrelevant for the query jumbling, only the + * arguments and the function OID matter. */ typedef struct FuncExpr { @@ -638,21 +658,21 @@ typedef struct FuncExpr /* PG_PROC OID of the function */ Oid funcid; /* PG_TYPE OID of result value */ - Oid funcresulttype; + Oid funcresulttype pg_node_attr(query_jumble_ignore); /* true if function returns set */ - bool funcretset; + bool funcretset pg_node_attr(query_jumble_ignore); /* * true if variadic arguments have been combined into an array last * argument */ - bool funcvariadic; + bool funcvariadic pg_node_attr(query_jumble_ignore); /* how to display this function call */ - CoercionForm funcformat; + CoercionForm funcformat pg_node_attr(query_jumble_ignore); /* OID of collation of result */ - Oid funccollid; + Oid funccollid pg_node_attr(query_jumble_ignore); /* OID of collation that function should use */ - Oid inputcollid; + Oid inputcollid pg_node_attr(query_jumble_ignore); /* arguments to the function */ List *args; /* token location, or -1 if unknown */ @@ -679,7 +699,7 @@ typedef struct NamedArgExpr /* the argument expression */ Expr *arg; /* the name */ - char *name; + char *name pg_node_attr(query_jumble_ignore); /* argument's number in positional notation */ int argnumber; /* argument name location, or -1 if unknown */ @@ -695,6 +715,9 @@ typedef struct NamedArgExpr * of the node. The planner makes sure it is valid before passing the node * tree to the executor, but during parsing/planning opfuncid can be 0. * Therefore, equal() will accept a zero value as being equal to other values. + * + * Internal state information and collation data is irrelevant for the query + * jumbling. */ typedef struct OpExpr { @@ -704,19 +727,19 @@ typedef struct OpExpr Oid opno; /* PG_PROC OID of underlying function */ - Oid opfuncid pg_node_attr(equal_ignore_if_zero); + Oid opfuncid pg_node_attr(equal_ignore_if_zero, query_jumble_ignore); /* PG_TYPE OID of result value */ - Oid opresulttype; + Oid opresulttype pg_node_attr(query_jumble_ignore); /* true if operator returns set */ - bool opretset; + bool opretset pg_node_attr(query_jumble_ignore); /* OID of collation of result */ - Oid opcollid; + Oid opcollid pg_node_attr(query_jumble_ignore); /* OID of collation that operator should use */ - Oid inputcollid; + Oid inputcollid pg_node_attr(query_jumble_ignore); /* arguments to the operator (1 or 2) */ List *args; @@ -772,6 +795,9 @@ typedef OpExpr NullIfExpr; * Similar to OpExpr, opfuncid, hashfuncid, and negfuncid are not necessarily * filled in right away, so will be ignored for equality if they are not set * yet. + * + * OID entries of the internal function types are irrelevant for the query + * jumbling, but the operator OID and the arguments are. */ typedef struct ScalarArrayOpExpr { @@ -781,19 +807,19 @@ typedef struct ScalarArrayOpExpr Oid opno; /* PG_PROC OID of comparison function */ - Oid opfuncid pg_node_attr(equal_ignore_if_zero); + Oid opfuncid pg_node_attr(equal_ignore_if_zero, query_jumble_ignore); /* PG_PROC OID of hash func or InvalidOid */ - Oid hashfuncid pg_node_attr(equal_ignore_if_zero); + Oid hashfuncid pg_node_attr(equal_ignore_if_zero, query_jumble_ignore); /* PG_PROC OID of negator of opfuncid function or InvalidOid. See above */ - Oid negfuncid pg_node_attr(equal_ignore_if_zero); + Oid negfuncid pg_node_attr(equal_ignore_if_zero, query_jumble_ignore); /* true for ANY, false for ALL */ bool useOr; /* OID of collation that operator should use */ - Oid inputcollid; + Oid inputcollid pg_node_attr(query_jumble_ignore); /* the scalar and array operands */ List *args; @@ -895,7 +921,7 @@ typedef struct SubLink int subLinkId; /* ID (1..n); 0 if not MULTIEXPR */ Node *testexpr; /* outer-query test for ALL/ANY/ROWCOMPARE */ /* originally specified operator name */ - List *operName; + List *operName pg_node_attr(query_jumble_ignore); /* subselect as Query* or raw parsetree */ Node *subselect; int location; /* token location, or -1 if unknown */ @@ -1007,11 +1033,11 @@ typedef struct FieldSelect Expr *arg; /* input expression */ AttrNumber fieldnum; /* attribute number of field to extract */ /* type of the field (result type of this node) */ - Oid resulttype; + Oid resulttype pg_node_attr(query_jumble_ignore); /* output typmod (usually -1) */ - int32 resulttypmod; + int32 resulttypmod pg_node_attr(query_jumble_ignore); /* OID of collation of the field */ - Oid resultcollid; + Oid resultcollid pg_node_attr(query_jumble_ignore); } FieldSelect; /* ---------------- @@ -1038,9 +1064,9 @@ typedef struct FieldStore Expr *arg; /* input tuple value */ List *newvals; /* new value(s) for field(s) */ /* integer list of field attnums */ - List *fieldnums; + List *fieldnums pg_node_attr(query_jumble_ignore); /* type of result (same as type of arg) */ - Oid resulttype; + Oid resulttype pg_node_attr(query_jumble_ignore); /* Like RowExpr, we deliberately omit a typmod and collation here */ } FieldStore; @@ -1063,11 +1089,11 @@ typedef struct RelabelType Expr *arg; /* input expression */ Oid resulttype; /* output type of coercion expression */ /* output typmod (usually -1) */ - int32 resulttypmod; + int32 resulttypmod pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid resultcollid; + Oid resultcollid pg_node_attr(query_jumble_ignore); /* how to display this node */ - CoercionForm relabelformat; + CoercionForm relabelformat pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ } RelabelType; @@ -1087,9 +1113,9 @@ typedef struct CoerceViaIO Oid resulttype; /* output type of coercion */ /* output typmod is not stored, but is presumed -1 */ /* OID of collation, or InvalidOid if none */ - Oid resultcollid; + Oid resultcollid pg_node_attr(query_jumble_ignore); /* how to display this node */ - CoercionForm coerceformat; + CoercionForm coerceformat pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ } CoerceViaIO; @@ -1113,11 +1139,11 @@ typedef struct ArrayCoerceExpr Expr *elemexpr; /* expression representing per-element work */ Oid resulttype; /* output type of coercion (an array type) */ /* output typmod (also element typmod) */ - int32 resulttypmod; + int32 resulttypmod pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid resultcollid; + Oid resultcollid pg_node_attr(query_jumble_ignore); /* how to display this node */ - CoercionForm coerceformat; + CoercionForm coerceformat pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ } ArrayCoerceExpr; @@ -1141,7 +1167,7 @@ typedef struct ConvertRowtypeExpr Oid resulttype; /* output type (always a composite type) */ /* Like RowExpr, we deliberately omit a typmod and collation here */ /* how to display this node */ - CoercionForm convertformat; + CoercionForm convertformat pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ } ConvertRowtypeExpr; @@ -1186,9 +1212,9 @@ typedef struct CaseExpr { Expr xpr; /* type of expression result */ - Oid casetype; + Oid casetype pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid casecollid; + Oid casecollid pg_node_attr(query_jumble_ignore); Expr *arg; /* implicit equality comparison argument */ List *args; /* the arguments (list of WHEN clauses) */ Expr *defresult; /* the default result (ELSE clause) */ @@ -1231,9 +1257,9 @@ typedef struct CaseTestExpr Expr xpr; Oid typeId; /* type for substituted value */ /* typemod for substituted value */ - int32 typeMod; + int32 typeMod pg_node_attr(query_jumble_ignore); /* collation for the substituted value */ - Oid collation; + Oid collation pg_node_attr(query_jumble_ignore); } CaseTestExpr; /* @@ -1248,15 +1274,15 @@ typedef struct ArrayExpr { Expr xpr; /* type of expression result */ - Oid array_typeid; + Oid array_typeid pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid array_collid; + Oid array_collid pg_node_attr(query_jumble_ignore); /* common type of array elements */ - Oid element_typeid; + Oid element_typeid pg_node_attr(query_jumble_ignore); /* the array elements or sub-arrays */ List *elements; /* true if elements are sub-arrays */ - bool multidims; + bool multidims pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; } ArrayExpr; @@ -1288,7 +1314,7 @@ typedef struct RowExpr List *args; /* the fields */ /* RECORDOID or a composite type's ID */ - Oid row_typeid; + Oid row_typeid pg_node_attr(query_jumble_ignore); /* * row_typeid cannot be a domain over composite, only plain composite. To @@ -1304,10 +1330,10 @@ typedef struct RowExpr */ /* how to display this node */ - CoercionForm row_format; + CoercionForm row_format pg_node_attr(query_jumble_ignore); /* list of String, or NIL */ - List *colnames; + List *colnames pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ } RowExpr; @@ -1344,11 +1370,11 @@ typedef struct RowCompareExpr /* LT LE GE or GT, never EQ or NE */ RowCompareType rctype; /* OID list of pairwise comparison ops */ - List *opnos; + List *opnos pg_node_attr(query_jumble_ignore); /* OID list of containing operator families */ - List *opfamilies; + List *opfamilies pg_node_attr(query_jumble_ignore); /* OID list of collations for comparisons */ - List *inputcollids; + List *inputcollids pg_node_attr(query_jumble_ignore); /* the left-hand input arguments */ List *largs; /* the right-hand input arguments */ @@ -1362,9 +1388,9 @@ typedef struct CoalesceExpr { Expr xpr; /* type of expression result */ - Oid coalescetype; + Oid coalescetype pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid coalescecollid; + Oid coalescecollid pg_node_attr(query_jumble_ignore); /* the arguments */ List *args; /* token location, or -1 if unknown */ @@ -1384,11 +1410,11 @@ typedef struct MinMaxExpr { Expr xpr; /* common type of arguments and result */ - Oid minmaxtype; + Oid minmaxtype pg_node_attr(query_jumble_ignore); /* OID of collation of result */ - Oid minmaxcollid; + Oid minmaxcollid pg_node_attr(query_jumble_ignore); /* OID of collation that function should use */ - Oid inputcollid; + Oid inputcollid pg_node_attr(query_jumble_ignore); /* function to execute */ MinMaxOp op; /* the arguments */ @@ -1432,18 +1458,18 @@ typedef struct XmlExpr /* xml function ID */ XmlExprOp op; /* name in xml(NAME foo ...) syntaxes */ - char *name; + char *name pg_node_attr(query_jumble_ignore); /* non-XML expressions for xml_attributes */ List *named_args; /* parallel list of String values */ - List *arg_names; + List *arg_names pg_node_attr(query_jumble_ignore); /* list of expressions */ List *args; /* DOCUMENT or CONTENT */ - XmlOptionType xmloption; + XmlOptionType xmloption pg_node_attr(query_jumble_ignore); /* target type/typmod for XMLSERIALIZE */ - Oid type; - int32 typmod; + Oid type pg_node_attr(query_jumble_ignore); + int32 typmod pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; } XmlExpr; @@ -1478,7 +1504,7 @@ typedef struct NullTest Expr *arg; /* input expression */ NullTestType nulltesttype; /* IS NULL, IS NOT NULL */ /* T to perform field-by-field null checks */ - bool argisrow; + bool argisrow pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ } NullTest; @@ -1512,6 +1538,8 @@ typedef struct BooleanTest * checked will be determined. If the value passes, it is returned as the * result; if not, an error is raised. Note that this is equivalent to * RelabelType in the scenario where no constraints are applied. + * + * typemod and collation are irrelevant for the query jumbling. */ typedef struct CoerceToDomain { @@ -1519,11 +1547,11 @@ typedef struct CoerceToDomain Expr *arg; /* input expression */ Oid resulttype; /* domain type ID (result type) */ /* output typmod (currently always -1) */ - int32 resulttypmod; + int32 resulttypmod pg_node_attr(query_jumble_ignore); /* OID of collation, or InvalidOid if none */ - Oid resultcollid; + Oid resultcollid pg_node_attr(query_jumble_ignore); /* how to display this node */ - CoercionForm coercionformat; + CoercionForm coercionformat pg_node_attr(query_jumble_ignore); int location; /* token location, or -1 if unknown */ } CoerceToDomain; @@ -1542,9 +1570,9 @@ typedef struct CoerceToDomainValue /* type for substituted value */ Oid typeId; /* typemod for substituted value */ - int32 typeMod; + int32 typeMod pg_node_attr(query_jumble_ignore); /* collation for the substituted value */ - Oid collation; + Oid collation pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; } CoerceToDomainValue; @@ -1555,6 +1583,8 @@ typedef struct CoerceToDomainValue * This is not an executable expression: it must be replaced by the actual * column default expression during rewriting. But it is convenient to * treat it as an expression node during parsing and rewriting. + * + * typemod and collation are irrelevant for the query jumbling. */ typedef struct SetToDefault { @@ -1562,9 +1592,9 @@ typedef struct SetToDefault /* type for substituted value */ Oid typeId; /* typemod for substituted value */ - int32 typeMod; + int32 typeMod pg_node_attr(query_jumble_ignore); /* collation for the substituted value */ - Oid collation; + Oid collation pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ int location; } SetToDefault; @@ -1682,15 +1712,15 @@ typedef struct TargetEntry /* attribute number (see notes above) */ AttrNumber resno; /* name of the column (could be NULL) */ - char *resname; + char *resname pg_node_attr(query_jumble_ignore); /* nonzero if referenced by a sort/group clause */ Index ressortgroupref; /* OID of column's source table */ - Oid resorigtbl; + Oid resorigtbl pg_node_attr(query_jumble_ignore); /* column's number in source table */ - AttrNumber resorigcol; + AttrNumber resorigcol pg_node_attr(query_jumble_ignore); /* set to true to eliminate the attribute from final target list */ - bool resjunk; + bool resjunk pg_node_attr(query_jumble_ignore); } TargetEntry; @@ -1773,13 +1803,13 @@ typedef struct JoinExpr Node *larg; /* left subtree */ Node *rarg; /* right subtree */ /* USING clause, if any (list of String) */ - List *usingClause; + List *usingClause pg_node_attr(query_jumble_ignore); /* alias attached to USING clause, if any */ - Alias *join_using_alias; + Alias *join_using_alias pg_node_attr(query_jumble_ignore); /* qualifiers on join, if any */ Node *quals; /* user-written alias clause, if any */ - Alias *alias; + Alias *alias pg_node_attr(query_jumble_ignore); /* RT index assigned for join, or 0 */ int rtindex; } JoinExpr; diff --git a/src/backend/nodes/README b/src/backend/nodes/README index 489a67eb89..7cf6e3b041 100644 --- a/src/backend/nodes/README +++ b/src/backend/nodes/README @@ -51,6 +51,7 @@ FILES IN THIS DIRECTORY (src/backend/nodes/) readfuncs.c - convert text representation back to a node tree (*) makefuncs.c - creator functions for some common node types nodeFuncs.c - some other general-purpose manipulation functions + queryjumblefuncs.c - compute a node tree for query jumbling (*) (*) - Most functions in these files are generated by gen_node_support.pl and #include'd there. diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index b3c1ead496..471333dc80 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -121,6 +121,8 @@ my %node_type_info; my @no_copy; # node types we don't want equal support for my @no_equal; +# node types we don't want jumble support for +my @no_query_jumble; # node types we don't want read support for my @no_read; # node types we don't want read/write support for @@ -155,12 +157,13 @@ my @extra_tags = qw( # This is a regular node, but we skip parsing it from its header file # since we won't use its internal structure here anyway. push @node_types, qw(List); -# Lists are specially treated in all four support files, too. +# Lists are specially treated in all five support files, too. # (Ideally we'd mark List as "special copy/equal" not "no copy/equal". # But until there's other use-cases for that, just hot-wire the tests # that would need to distinguish.) push @no_copy, qw(List); push @no_equal, qw(List); +push @no_query_jumble, qw(List); push @special_read_write, qw(List); # Nodes with custom copy/equal implementations are skipped from @@ -170,6 +173,9 @@ my @custom_copy_equal; # Similarly for custom read/write implementations. my @custom_read_write; +# Similarly for custom query jumble implementation. +my @custom_query_jumble; + # Track node types with manually assigned NodeTag numbers. my %manual_nodetag_number; @@ -319,6 +325,10 @@ foreach my $infile (@ARGV) { push @custom_read_write, $in_struct; } + elsif ($attr eq 'custom_query_jumble') + { + push @custom_query_jumble, $in_struct; + } elsif ($attr eq 'no_copy') { push @no_copy, $in_struct; @@ -332,6 +342,10 @@ foreach my $infile (@ARGV) push @no_copy, $in_struct; push @no_equal, $in_struct; } + elsif ($attr eq 'no_query_jumble') + { + push @no_query_jumble, $in_struct; + } elsif ($attr eq 'no_read') { push @no_read, $in_struct; @@ -457,6 +471,8 @@ foreach my $infile (@ARGV) equal_as_scalar equal_ignore equal_ignore_if_zero + query_jumble_ignore + query_jumble_location read_write_ignore write_only_relids write_only_nondefault_pathtarget @@ -1225,6 +1241,102 @@ close $ofs; close $rfs; +# queryjumblefuncs.c + +push @output_files, 'queryjumblefuncs.funcs.c'; +open my $jff, '>', "$output_path/queryjumblefuncs.funcs.c$tmpext" or die $!; +push @output_files, 'queryjumblefuncs.switch.c'; +open my $jfs, '>', "$output_path/queryjumblefuncs.switch.c$tmpext" or die $!; + +printf $jff $header_comment, 'queryjumblefuncs.funcs.c'; +printf $jfs $header_comment, 'queryjumblefuncs.switch.c'; + +print $jff $node_includes; + +foreach my $n (@node_types) +{ + next if elem $n, @abstract_types; + next if elem $n, @nodetag_only; + my $struct_no_query_jumble = (elem $n, @no_query_jumble); + + print $jfs "\t\t\tcase T_${n}:\n" + . "\t\t\t\t_jumble${n}(jstate, expr);\n" + . "\t\t\t\tbreak;\n" + unless $struct_no_query_jumble; + + next if elem $n, @custom_query_jumble; + + print $jff " +static void +_jumble${n}(JumbleState *jstate, Node *node) +{ +\t${n} *expr = (${n} *) node;\n +" unless $struct_no_query_jumble; + + # print instructions for each field + foreach my $f (@{ $node_type_info{$n}->{fields} }) + { + my $t = $node_type_info{$n}->{field_types}{$f}; + my @a = @{ $node_type_info{$n}->{field_attrs}{$f} }; + my $query_jumble_ignore = $struct_no_query_jumble; + my $query_jumble_location = 0; + + # extract per-field attributes + foreach my $a (@a) + { + if ($a eq 'query_jumble_ignore') + { + $query_jumble_ignore = 1; + } + elsif ($a eq 'query_jumble_location') + { + $query_jumble_location = 1; + } + } + + # node type + if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/) + and elem $1, @node_types) + { + print $jff "\tJUMBLE_NODE($f);\n" + unless $query_jumble_ignore; + } + elsif ($t eq 'int' && $f =~ 'location$') + { + # Track the node's location only if directly requested. + if ($query_jumble_location) + { + print $jff "\tJUMBLE_LOCATION($f);\n" + unless $query_jumble_ignore; + } + } + elsif ($t eq 'char*') + { + print $jff "\tJUMBLE_STRING($f);\n" + unless $query_jumble_ignore; + } + else + { + print $jff "\tJUMBLE_FIELD($f);\n" + unless $query_jumble_ignore; + } + } + + # Some nodes have no attributes like CheckPointStmt, + # so tweak things for empty contents. + if (scalar(@{ $node_type_info{$n}->{fields} }) == 0) + { + print $jff "\t(void) expr;\n" + unless $struct_no_query_jumble; + } + + print $jff "} +" unless $struct_no_query_jumble; +} + +close $jff; +close $jfs; + # now rename the temporary files to their final names foreach my $file (@output_files) { diff --git a/src/backend/nodes/meson.build b/src/backend/nodes/meson.build index 9230515e7f..31467a12d3 100644 --- a/src/backend/nodes/meson.build +++ b/src/backend/nodes/meson.build @@ -10,7 +10,6 @@ backend_sources += files( 'nodes.c', 'params.c', 'print.c', - 'queryjumblefuncs.c', 'read.c', 'tidbitmap.c', 'value.c', @@ -21,6 +20,7 @@ backend_sources += files( nodefunc_sources = files( 'copyfuncs.c', 'equalfuncs.c', + 'queryjumblefuncs.c', 'outfuncs.c', 'readfuncs.c', ) diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 16084842a3..16fdf7164a 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -45,15 +45,12 @@ int compute_query_id = COMPUTE_QUERY_ID_AUTO; /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; -static uint64 compute_utility_query_id(const char *query_text, - int query_location, int query_len); static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); -static void JumbleQueryInternal(JumbleState *jstate, Query *query); -static void JumbleRangeTable(JumbleState *jstate, List *rtable); -static void JumbleRowMarks(JumbleState *jstate, List *rowMarks); -static void JumbleExpr(JumbleState *jstate, Node *node); static void RecordConstLocation(JumbleState *jstate, int location); +static void _jumbleNode(JumbleState *jstate, Node *node); +static void _jumbleList(JumbleState *jstate, Node *node); +static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node); /* * Given a possibly multi-statement source string, confine our attention to the @@ -105,38 +102,29 @@ JumbleQuery(Query *query, const char *querytext) Assert(IsQueryIdEnabled()); - if (query->utilityStmt) - { - query->queryId = compute_utility_query_id(querytext, - query->stmt_location, - query->stmt_len); - } - else - { - jstate = (JumbleState *) palloc(sizeof(JumbleState)); + jstate = (JumbleState *) palloc(sizeof(JumbleState)); - /* Set up workspace for query jumbling */ - jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); - jstate->jumble_len = 0; - jstate->clocations_buf_size = 32; - jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); - jstate->clocations_count = 0; - jstate->highest_extern_param_id = 0; + /* Set up workspace for query jumbling */ + jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); + jstate->jumble_len = 0; + jstate->clocations_buf_size = 32; + jstate->clocations = (LocationLen *) + palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + jstate->clocations_count = 0; + jstate->highest_extern_param_id = 0; - /* Compute query ID and mark the Query node with it */ - JumbleQueryInternal(jstate, query); - query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + /* Compute query ID and mark the Query node with it */ + _jumbleNode(jstate, (Node *) query); + query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, + jstate->jumble_len, + 0)); - /* - * If we are unlucky enough to get a hash of zero, use 1 instead, to - * prevent confusion with the utility-statement case. - */ - if (query->queryId == UINT64CONST(0)) - query->queryId = UINT64CONST(1); - } + /* + * If we are unlucky enough to get a hash of zero, use 1 instead, to + * prevent confusion with the utility-statement case. + */ + if (query->queryId == UINT64CONST(0)) + query->queryId = UINT64CONST(1); return jstate; } @@ -154,34 +142,6 @@ EnableQueryId(void) query_id_enabled = true; } -/* - * Compute a query identifier for the given utility query string. - */ -static uint64 -compute_utility_query_id(const char *query_text, int query_location, int query_len) -{ - uint64 queryId; - const char *sql; - - /* - * Confine our attention to the relevant part of the string, if the query - * is a portion of a multi-statement source string. - */ - sql = CleanQuerytext(query_text, &query_location, &query_len); - - queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) sql, - query_len, 0)); - - /* - * If we are unlucky enough to get a hash of zero(invalid), use queryID as - * 2 instead, queryID 1 is already in use for normal statements. - */ - if (queryId == UINT64CONST(0)) - queryId = UINT64CONST(2); - - return queryId; -} - /* * AppendJumble: Append a value that is substantive in a given query to * the current jumble. @@ -219,621 +179,6 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) jstate->jumble_len = jumble_len; } -/* - * Wrappers around AppendJumble to encapsulate details of serialization - * of individual local variable elements. - */ -#define APP_JUMB(item) \ - AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) -#define APP_JUMB_STRING(str) \ - AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1) - -/* - * JumbleQueryInternal: Selectively serialize the query tree, appending - * significant data to the "query jumble" while ignoring nonsignificant data. - * - * Rule of thumb for what to include is that we should ignore anything not - * semantically significant (such as alias names) as well as anything that can - * be deduced from child nodes (else we'd just be double-hashing that piece - * of information). - */ -static void -JumbleQueryInternal(JumbleState *jstate, Query *query) -{ - Assert(IsA(query, Query)); - Assert(query->utilityStmt == NULL); - - APP_JUMB(query->commandType); - /* resultRelation is usually predictable from commandType */ - JumbleExpr(jstate, (Node *) query->cteList); - JumbleRangeTable(jstate, query->rtable); - JumbleExpr(jstate, (Node *) query->jointree); - JumbleExpr(jstate, (Node *) query->mergeActionList); - JumbleExpr(jstate, (Node *) query->targetList); - JumbleExpr(jstate, (Node *) query->onConflict); - JumbleExpr(jstate, (Node *) query->returningList); - JumbleExpr(jstate, (Node *) query->groupClause); - APP_JUMB(query->groupDistinct); - JumbleExpr(jstate, (Node *) query->groupingSets); - JumbleExpr(jstate, query->havingQual); - JumbleExpr(jstate, (Node *) query->windowClause); - JumbleExpr(jstate, (Node *) query->distinctClause); - JumbleExpr(jstate, (Node *) query->sortClause); - JumbleExpr(jstate, query->limitOffset); - JumbleExpr(jstate, query->limitCount); - APP_JUMB(query->limitOption); - JumbleRowMarks(jstate, query->rowMarks); - JumbleExpr(jstate, query->setOperations); -} - -/* - * Jumble a range table - */ -static void -JumbleRangeTable(JumbleState *jstate, List *rtable) -{ - ListCell *lc; - - foreach(lc, rtable) - { - RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc); - - APP_JUMB(rte->rtekind); - switch (rte->rtekind) - { - case RTE_RELATION: - APP_JUMB(rte->relid); - JumbleExpr(jstate, (Node *) rte->tablesample); - APP_JUMB(rte->inh); - break; - case RTE_SUBQUERY: - JumbleQueryInternal(jstate, rte->subquery); - break; - case RTE_JOIN: - APP_JUMB(rte->jointype); - break; - case RTE_FUNCTION: - JumbleExpr(jstate, (Node *) rte->functions); - break; - case RTE_TABLEFUNC: - JumbleExpr(jstate, (Node *) rte->tablefunc); - break; - case RTE_VALUES: - JumbleExpr(jstate, (Node *) rte->values_lists); - break; - case RTE_CTE: - - /* - * Depending on the CTE name here isn't ideal, but it's the - * only info we have to identify the referenced WITH item. - */ - APP_JUMB_STRING(rte->ctename); - APP_JUMB(rte->ctelevelsup); - break; - case RTE_NAMEDTUPLESTORE: - APP_JUMB_STRING(rte->enrname); - break; - case RTE_RESULT: - break; - default: - elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind); - break; - } - } -} - -/* - * Jumble a rowMarks list - */ -static void -JumbleRowMarks(JumbleState *jstate, List *rowMarks) -{ - ListCell *lc; - - foreach(lc, rowMarks) - { - RowMarkClause *rowmark = lfirst_node(RowMarkClause, lc); - - if (!rowmark->pushedDown) - { - APP_JUMB(rowmark->rti); - APP_JUMB(rowmark->strength); - APP_JUMB(rowmark->waitPolicy); - } - } -} - -/* - * Jumble an expression tree - * - * In general this function should handle all the same node types that - * expression_tree_walker() does, and therefore it's coded to be as parallel - * to that function as possible. However, since we are only invoked on - * queries immediately post-parse-analysis, we need not handle node types - * that only appear in planning. - * - * Note: the reason we don't simply use expression_tree_walker() is that the - * point of that function is to support tree walkers that don't care about - * most tree node types, but here we care about all types. We should complain - * about any unrecognized node type. - */ -static void -JumbleExpr(JumbleState *jstate, Node *node) -{ - ListCell *temp; - - if (node == NULL) - return; - - /* Guard against stack overflow due to overly complex expressions */ - check_stack_depth(); - - /* - * We always emit the node's NodeTag, then any additional fields that are - * considered significant, and then we recurse to any child nodes. - */ - APP_JUMB(node->type); - - switch (nodeTag(node)) - { - case T_Var: - { - Var *var = (Var *) node; - - APP_JUMB(var->varno); - APP_JUMB(var->varattno); - APP_JUMB(var->varlevelsup); - } - break; - case T_Const: - { - Const *c = (Const *) node; - - /* We jumble only the constant's type, not its value */ - APP_JUMB(c->consttype); - /* Also, record its parse location for query normalization */ - RecordConstLocation(jstate, c->location); - } - break; - case T_Param: - { - Param *p = (Param *) node; - - APP_JUMB(p->paramkind); - APP_JUMB(p->paramid); - APP_JUMB(p->paramtype); - /* Also, track the highest external Param id */ - if (p->paramkind == PARAM_EXTERN && - p->paramid > jstate->highest_extern_param_id) - jstate->highest_extern_param_id = p->paramid; - } - break; - case T_Aggref: - { - Aggref *expr = (Aggref *) node; - - APP_JUMB(expr->aggfnoid); - JumbleExpr(jstate, (Node *) expr->aggdirectargs); - JumbleExpr(jstate, (Node *) expr->args); - JumbleExpr(jstate, (Node *) expr->aggorder); - JumbleExpr(jstate, (Node *) expr->aggdistinct); - JumbleExpr(jstate, (Node *) expr->aggfilter); - } - break; - case T_GroupingFunc: - { - GroupingFunc *grpnode = (GroupingFunc *) node; - - JumbleExpr(jstate, (Node *) grpnode->refs); - APP_JUMB(grpnode->agglevelsup); - } - break; - case T_WindowFunc: - { - WindowFunc *expr = (WindowFunc *) node; - - APP_JUMB(expr->winfnoid); - APP_JUMB(expr->winref); - JumbleExpr(jstate, (Node *) expr->args); - JumbleExpr(jstate, (Node *) expr->aggfilter); - } - break; - case T_SubscriptingRef: - { - SubscriptingRef *sbsref = (SubscriptingRef *) node; - - JumbleExpr(jstate, (Node *) sbsref->refupperindexpr); - JumbleExpr(jstate, (Node *) sbsref->reflowerindexpr); - JumbleExpr(jstate, (Node *) sbsref->refexpr); - JumbleExpr(jstate, (Node *) sbsref->refassgnexpr); - } - break; - case T_FuncExpr: - { - FuncExpr *expr = (FuncExpr *) node; - - APP_JUMB(expr->funcid); - JumbleExpr(jstate, (Node *) expr->args); - } - break; - case T_NamedArgExpr: - { - NamedArgExpr *nae = (NamedArgExpr *) node; - - APP_JUMB(nae->argnumber); - JumbleExpr(jstate, (Node *) nae->arg); - } - break; - case T_OpExpr: - case T_DistinctExpr: /* struct-equivalent to OpExpr */ - case T_NullIfExpr: /* struct-equivalent to OpExpr */ - { - OpExpr *expr = (OpExpr *) node; - - APP_JUMB(expr->opno); - JumbleExpr(jstate, (Node *) expr->args); - } - break; - case T_ScalarArrayOpExpr: - { - ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node; - - APP_JUMB(expr->opno); - APP_JUMB(expr->useOr); - JumbleExpr(jstate, (Node *) expr->args); - } - break; - case T_BoolExpr: - { - BoolExpr *expr = (BoolExpr *) node; - - APP_JUMB(expr->boolop); - JumbleExpr(jstate, (Node *) expr->args); - } - break; - case T_SubLink: - { - SubLink *sublink = (SubLink *) node; - - APP_JUMB(sublink->subLinkType); - APP_JUMB(sublink->subLinkId); - JumbleExpr(jstate, (Node *) sublink->testexpr); - JumbleQueryInternal(jstate, castNode(Query, sublink->subselect)); - } - break; - case T_FieldSelect: - { - FieldSelect *fs = (FieldSelect *) node; - - APP_JUMB(fs->fieldnum); - JumbleExpr(jstate, (Node *) fs->arg); - } - break; - case T_FieldStore: - { - FieldStore *fstore = (FieldStore *) node; - - JumbleExpr(jstate, (Node *) fstore->arg); - JumbleExpr(jstate, (Node *) fstore->newvals); - } - break; - case T_RelabelType: - { - RelabelType *rt = (RelabelType *) node; - - APP_JUMB(rt->resulttype); - JumbleExpr(jstate, (Node *) rt->arg); - } - break; - case T_CoerceViaIO: - { - CoerceViaIO *cio = (CoerceViaIO *) node; - - APP_JUMB(cio->resulttype); - JumbleExpr(jstate, (Node *) cio->arg); - } - break; - case T_ArrayCoerceExpr: - { - ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node; - - APP_JUMB(acexpr->resulttype); - JumbleExpr(jstate, (Node *) acexpr->arg); - JumbleExpr(jstate, (Node *) acexpr->elemexpr); - } - break; - case T_ConvertRowtypeExpr: - { - ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node; - - APP_JUMB(crexpr->resulttype); - JumbleExpr(jstate, (Node *) crexpr->arg); - } - break; - case T_CollateExpr: - { - CollateExpr *ce = (CollateExpr *) node; - - APP_JUMB(ce->collOid); - JumbleExpr(jstate, (Node *) ce->arg); - } - break; - case T_CaseExpr: - { - CaseExpr *caseexpr = (CaseExpr *) node; - - JumbleExpr(jstate, (Node *) caseexpr->arg); - foreach(temp, caseexpr->args) - { - CaseWhen *when = lfirst_node(CaseWhen, temp); - - JumbleExpr(jstate, (Node *) when->expr); - JumbleExpr(jstate, (Node *) when->result); - } - JumbleExpr(jstate, (Node *) caseexpr->defresult); - } - break; - case T_CaseTestExpr: - { - CaseTestExpr *ct = (CaseTestExpr *) node; - - APP_JUMB(ct->typeId); - } - break; - case T_ArrayExpr: - JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements); - break; - case T_RowExpr: - JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args); - break; - case T_RowCompareExpr: - { - RowCompareExpr *rcexpr = (RowCompareExpr *) node; - - APP_JUMB(rcexpr->rctype); - JumbleExpr(jstate, (Node *) rcexpr->largs); - JumbleExpr(jstate, (Node *) rcexpr->rargs); - } - break; - case T_CoalesceExpr: - JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args); - break; - case T_MinMaxExpr: - { - MinMaxExpr *mmexpr = (MinMaxExpr *) node; - - APP_JUMB(mmexpr->op); - JumbleExpr(jstate, (Node *) mmexpr->args); - } - break; - case T_XmlExpr: - { - XmlExpr *xexpr = (XmlExpr *) node; - - APP_JUMB(xexpr->op); - JumbleExpr(jstate, (Node *) xexpr->named_args); - JumbleExpr(jstate, (Node *) xexpr->args); - } - break; - case T_NullTest: - { - NullTest *nt = (NullTest *) node; - - APP_JUMB(nt->nulltesttype); - JumbleExpr(jstate, (Node *) nt->arg); - } - break; - case T_BooleanTest: - { - BooleanTest *bt = (BooleanTest *) node; - - APP_JUMB(bt->booltesttype); - JumbleExpr(jstate, (Node *) bt->arg); - } - break; - case T_CoerceToDomain: - { - CoerceToDomain *cd = (CoerceToDomain *) node; - - APP_JUMB(cd->resulttype); - JumbleExpr(jstate, (Node *) cd->arg); - } - break; - case T_CoerceToDomainValue: - { - CoerceToDomainValue *cdv = (CoerceToDomainValue *) node; - - APP_JUMB(cdv->typeId); - } - break; - case T_SetToDefault: - { - SetToDefault *sd = (SetToDefault *) node; - - APP_JUMB(sd->typeId); - } - break; - case T_CurrentOfExpr: - { - CurrentOfExpr *ce = (CurrentOfExpr *) node; - - APP_JUMB(ce->cvarno); - if (ce->cursor_name) - APP_JUMB_STRING(ce->cursor_name); - APP_JUMB(ce->cursor_param); - } - break; - case T_NextValueExpr: - { - NextValueExpr *nve = (NextValueExpr *) node; - - APP_JUMB(nve->seqid); - APP_JUMB(nve->typeId); - } - break; - case T_InferenceElem: - { - InferenceElem *ie = (InferenceElem *) node; - - APP_JUMB(ie->infercollid); - APP_JUMB(ie->inferopclass); - JumbleExpr(jstate, ie->expr); - } - break; - case T_TargetEntry: - { - TargetEntry *tle = (TargetEntry *) node; - - APP_JUMB(tle->resno); - APP_JUMB(tle->ressortgroupref); - JumbleExpr(jstate, (Node *) tle->expr); - } - break; - case T_RangeTblRef: - { - RangeTblRef *rtr = (RangeTblRef *) node; - - APP_JUMB(rtr->rtindex); - } - break; - case T_JoinExpr: - { - JoinExpr *join = (JoinExpr *) node; - - APP_JUMB(join->jointype); - APP_JUMB(join->isNatural); - APP_JUMB(join->rtindex); - JumbleExpr(jstate, join->larg); - JumbleExpr(jstate, join->rarg); - JumbleExpr(jstate, join->quals); - } - break; - case T_FromExpr: - { - FromExpr *from = (FromExpr *) node; - - JumbleExpr(jstate, (Node *) from->fromlist); - JumbleExpr(jstate, from->quals); - } - break; - case T_OnConflictExpr: - { - OnConflictExpr *conf = (OnConflictExpr *) node; - - APP_JUMB(conf->action); - JumbleExpr(jstate, (Node *) conf->arbiterElems); - JumbleExpr(jstate, conf->arbiterWhere); - JumbleExpr(jstate, (Node *) conf->onConflictSet); - JumbleExpr(jstate, conf->onConflictWhere); - APP_JUMB(conf->constraint); - APP_JUMB(conf->exclRelIndex); - JumbleExpr(jstate, (Node *) conf->exclRelTlist); - } - break; - case T_MergeAction: - { - MergeAction *mergeaction = (MergeAction *) node; - - APP_JUMB(mergeaction->matched); - APP_JUMB(mergeaction->commandType); - JumbleExpr(jstate, mergeaction->qual); - JumbleExpr(jstate, (Node *) mergeaction->targetList); - } - break; - case T_List: - foreach(temp, (List *) node) - { - JumbleExpr(jstate, (Node *) lfirst(temp)); - } - break; - case T_IntList: - foreach(temp, (List *) node) - { - APP_JUMB(lfirst_int(temp)); - } - break; - case T_SortGroupClause: - { - SortGroupClause *sgc = (SortGroupClause *) node; - - APP_JUMB(sgc->tleSortGroupRef); - APP_JUMB(sgc->eqop); - APP_JUMB(sgc->sortop); - APP_JUMB(sgc->nulls_first); - } - break; - case T_GroupingSet: - { - GroupingSet *gsnode = (GroupingSet *) node; - - JumbleExpr(jstate, (Node *) gsnode->content); - } - break; - case T_WindowClause: - { - WindowClause *wc = (WindowClause *) node; - - APP_JUMB(wc->winref); - APP_JUMB(wc->frameOptions); - JumbleExpr(jstate, (Node *) wc->partitionClause); - JumbleExpr(jstate, (Node *) wc->orderClause); - JumbleExpr(jstate, wc->startOffset); - JumbleExpr(jstate, wc->endOffset); - } - break; - case T_CommonTableExpr: - { - CommonTableExpr *cte = (CommonTableExpr *) node; - - /* we store the string name because RTE_CTE RTEs need it */ - APP_JUMB_STRING(cte->ctename); - APP_JUMB(cte->ctematerialized); - JumbleQueryInternal(jstate, castNode(Query, cte->ctequery)); - } - break; - case T_SetOperationStmt: - { - SetOperationStmt *setop = (SetOperationStmt *) node; - - APP_JUMB(setop->op); - APP_JUMB(setop->all); - JumbleExpr(jstate, setop->larg); - JumbleExpr(jstate, setop->rarg); - } - break; - case T_RangeTblFunction: - { - RangeTblFunction *rtfunc = (RangeTblFunction *) node; - - JumbleExpr(jstate, rtfunc->funcexpr); - } - break; - case T_TableFunc: - { - TableFunc *tablefunc = (TableFunc *) node; - - JumbleExpr(jstate, tablefunc->docexpr); - JumbleExpr(jstate, tablefunc->rowexpr); - JumbleExpr(jstate, (Node *) tablefunc->colexprs); - } - break; - case T_TableSampleClause: - { - TableSampleClause *tsc = (TableSampleClause *) node; - - APP_JUMB(tsc->tsmhandler); - JumbleExpr(jstate, (Node *) tsc->args); - JumbleExpr(jstate, (Node *) tsc->repeatable); - } - break; - default: - /* Only a warning, since we can stumble along anyway */ - elog(WARNING, "unrecognized node type: %d", - (int) nodeTag(node)); - break; - } -} - /* * Record location of constant within query string of query tree * that is currently being walked. @@ -859,3 +204,154 @@ RecordConstLocation(JumbleState *jstate, int location) jstate->clocations_count++; } } + +#define JUMBLE_NODE(item) \ + _jumbleNode(jstate, (Node *) expr->item) +#define JUMBLE_LOCATION(location) \ + RecordConstLocation(jstate, expr->location) +#define JUMBLE_FIELD(item) \ + AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) +#define JUMBLE_FIELD_SINGLE(item) \ + AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) +#define JUMBLE_STRING(str) \ +do { \ + if (expr->str) \ + AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \ +} while(0) + +#include "queryjumblefuncs.funcs.c" + +static void +_jumbleNode(JumbleState *jstate, Node *node) +{ + Node *expr = node; + + if (expr == NULL) + return; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + JUMBLE_FIELD(type); + + switch (nodeTag(expr)) + { +#include "queryjumblefuncs.switch.c" + + case T_List: + case T_IntList: + case T_OidList: + case T_XidList: + _jumbleList(jstate, expr); + break; + + default: + /* Only a warning, since we can stumble along anyway */ + elog(WARNING, "unrecognized node type: %d", + (int) nodeTag(expr)); + break; + } + + /* Special cases to handle outside the automated code */ + switch (nodeTag(expr)) + { + case T_Param: + { + Param *p = (Param *) node; + + /* + * Update the highest Param id seen, in order to start + * normalization correctly. + */ + if (p->paramkind == PARAM_EXTERN && + p->paramid > jstate->highest_extern_param_id) + jstate->highest_extern_param_id = p->paramid; + } + break; + default: + break; + } +} + +static void +_jumbleList(JumbleState *jstate, Node *node) +{ + List *expr = (List *) node; + ListCell *l; + + switch (expr->type) + { + case T_List: + foreach(l, expr) + _jumbleNode(jstate, lfirst(l)); + break; + case T_IntList: + foreach(l, expr) + JUMBLE_FIELD_SINGLE(lfirst_int(l)); + break; + case T_OidList: + foreach(l, expr) + JUMBLE_FIELD_SINGLE(lfirst_oid(l)); + break; + case T_XidList: + foreach(l, expr) + JUMBLE_FIELD_SINGLE(lfirst_xid(l)); + break; + default: + elog(ERROR, "unrecognized list node type: %d", + (int) expr->type); + return; + } +} + +static void +_jumbleRangeTblEntry(JumbleState *jstate, Node *node) +{ + RangeTblEntry *expr = (RangeTblEntry *) node; + + JUMBLE_FIELD(rtekind); + switch (expr->rtekind) + { + case RTE_RELATION: + JUMBLE_FIELD(relid); + JUMBLE_NODE(tablesample); + JUMBLE_FIELD(inh); + break; + case RTE_SUBQUERY: + JUMBLE_NODE(subquery); + break; + case RTE_JOIN: + JUMBLE_FIELD(jointype); + break; + case RTE_FUNCTION: + JUMBLE_NODE(functions); + break; + case RTE_TABLEFUNC: + JUMBLE_NODE(tablefunc); + break; + case RTE_VALUES: + JUMBLE_NODE(values_lists); + break; + case RTE_CTE: + + /* + * Depending on the CTE name here isn't ideal, but it's the only + * info we have to identify the referenced WITH item. + */ + JUMBLE_STRING(ctename); + JUMBLE_FIELD(ctelevelsup); + break; + case RTE_NAMEDTUPLESTORE: + JUMBLE_STRING(enrname); + break; + case RTE_RESULT: + break; + default: + elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind); + break; + } +} -- 2.39.0
From a55d0adb5bbd6bd09e0b27d3b1a2b0ed956eb7b0 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Wed, 25 Jan 2023 08:47:40 +0900 Subject: [PATCH v7 4/4] Add GUC utility_query_id This GUC has two modes to control the computation method of query IDs for utilities: - 'string', the default, to hash the string query. - 'jumble', to use the parsed tree. --- src/include/nodes/queryjumble.h | 7 ++ src/backend/nodes/queryjumblefuncs.c | 81 ++++++++++++++----- src/backend/utils/misc/guc_tables.c | 16 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + doc/src/sgml/config.sgml | 31 +++++++ .../expected/pg_stat_statements.out | 31 +++++++ .../sql/pg_stat_statements.sql | 17 ++++ 7 files changed, 164 insertions(+), 20 deletions(-) diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 204b8f74fd..261aea6bcf 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -59,8 +59,15 @@ enum ComputeQueryIdType COMPUTE_QUERY_ID_REGRESS }; +enum UtilityQueryIdType +{ + UTILITY_QUERY_ID_STRING, + UTILITY_QUERY_ID_JUMBLE +}; + /* GUC parameters */ extern PGDLLIMPORT int compute_query_id; +extern PGDLLIMPORT int utility_query_id; extern const char *CleanQuerytext(const char *query, int *location, int *len); diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 16fdf7164a..d55adf5020 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -41,12 +41,15 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +int utility_query_id = UTILITY_QUERY_ID_STRING; /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); +static uint64 compute_utility_query_id(const char *query_text, + int query_location, int query_len); static void RecordConstLocation(JumbleState *jstate, int location); static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); @@ -102,29 +105,39 @@ JumbleQuery(Query *query, const char *querytext) Assert(IsQueryIdEnabled()); - jstate = (JumbleState *) palloc(sizeof(JumbleState)); + if (query->utilityStmt && + utility_query_id == UTILITY_QUERY_ID_STRING) + { + query->queryId = compute_utility_query_id(querytext, + query->stmt_location, + query->stmt_len); + } + else + { + jstate = (JumbleState *) palloc(sizeof(JumbleState)); - /* Set up workspace for query jumbling */ - jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); - jstate->jumble_len = 0; - jstate->clocations_buf_size = 32; - jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); - jstate->clocations_count = 0; - jstate->highest_extern_param_id = 0; + /* Set up workspace for query jumbling */ + jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); + jstate->jumble_len = 0; + jstate->clocations_buf_size = 32; + jstate->clocations = (LocationLen *) + palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + jstate->clocations_count = 0; + jstate->highest_extern_param_id = 0; - /* Compute query ID and mark the Query node with it */ - _jumbleNode(jstate, (Node *) query); - query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + /* Compute query ID and mark the Query node with it */ + _jumbleNode(jstate, (Node *) query); + query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, + jstate->jumble_len, + 0)); - /* - * If we are unlucky enough to get a hash of zero, use 1 instead, to - * prevent confusion with the utility-statement case. - */ - if (query->queryId == UINT64CONST(0)) - query->queryId = UINT64CONST(1); + /* + * If we are unlucky enough to get a hash of zero, use 1 instead, to + * prevent confusion with the utility-statement case. + */ + if (query->queryId == UINT64CONST(0)) + query->queryId = UINT64CONST(1); + } return jstate; } @@ -142,6 +155,34 @@ EnableQueryId(void) query_id_enabled = true; } +/* + * Compute a query identifier for the given utility query string. + */ +static uint64 +compute_utility_query_id(const char *query_text, int query_location, int query_len) +{ + uint64 queryId; + const char *sql; + + /* + * Confine our attention to the relevant part of the string, if the query + * is a portion of a multi-statement source string. + */ + sql = CleanQuerytext(query_text, &query_location, &query_len); + + queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) sql, + query_len, 0)); + + /* + * If we are unlucky enough to get a hash of zero(invalid), use queryID as + * 2 instead, queryID 1 is already in use for normal statements. + */ + if (queryId == UINT64CONST(0)) + queryId = UINT64CONST(2); + + return queryId; +} + /* * AppendJumble: Append a value that is substantive in a given query to * the current jumble. diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 4ac808ed22..97619c4e1d 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -294,6 +294,12 @@ static const struct config_enum_entry compute_query_id_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry utility_query_id_options[] = { + {"string", UTILITY_QUERY_ID_STRING, false}, + {"jumble", UTILITY_QUERY_ID_JUMBLE, false}, + {NULL, 0, false} +}; + /* * Although only "on", "off", and "partition" are documented, we * accept all the likely variants of "on" and "off". @@ -4574,6 +4580,16 @@ struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"utility_query_id", PGC_SUSET, STATS_MONITORING, + gettext_noop("Controls method computing query ID for utilities."), + NULL + }, + &utility_query_id, + UTILITY_QUERY_ID_STRING, utility_query_id_options, + NULL, NULL, NULL + }, + { {"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Enables the planner to use constraints to optimize queries."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index d06074b86f..bbf95af59d 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -623,6 +623,7 @@ # - Monitoring - #compute_query_id = auto +#utility_query_id = string # string, jumble #log_statement_stats = off #log_parser_stats = off #log_planner_stats = off diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f985afc009..4ccd148471 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8241,6 +8241,37 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-utility-query-id" xreflabel="utility_query_id"> + <term><varname>utility_query_id</varname> (<type>enum</type>) + <indexterm> + <primary><varname>utility_query_id</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Controls the method used to compute the query identifier of a utility + query. Valid values are <literal>string</literal> to use a hash of the + query string and <literal>jumble</literal> to compute the query + identifier depending on the parsed tree of the utility query. + The default is <literal>string</literal>. + </para> + <para> + <literal>jumble</literal> is more costly than <literal>string</literal> + as the computation of the query identifier walks through the + post-parse-analysis representation of the queries for utility queries. + However, <literal>jumble</literal> is able to apply normalization + to the queries computed, meaning that queries written differently + but having the same query representation may be able to use the same + identifier. + For example, <literal>BEGIN;</literal> and <literal>begin;</literal> + will have the same query identifier under <literal>jumble</literal> as + both queries have the same query representation. The query identifier + would be different under <literal>string</literal>, because the query + strings are different. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-log-statement-stats"> <term><varname>log_statement_stats</varname> (<type>boolean</type>) <indexterm> diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 9ac5c87c3a..8bdf8beec3 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -554,6 +554,7 @@ DROP TABLE pgss_a, pgss_b CASCADE; -- utility commands -- SET pg_stat_statements.track_utility = TRUE; +SET utility_query_id = 'string'; SELECT pg_stat_statements_reset(); pg_stat_statements_reset -------------------------- @@ -592,6 +593,36 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 (9 rows) +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SET utility_query_id = 'jumble'; +-- These queries have a different string, but the same parsing +-- representation. +Begin; +Create Table test_utility_query (a int); +Drop Table test_utility_query; +Commit; +BEGIN; +CREATE TABLE test_utility_query (a int); +DROP TABLE test_utility_query; +COMMIT; +SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls | rows +------------------------------------------------------------------------------+-------+------ + Begin | 2 | 0 + Commit | 2 | 0 + Create Table test_utility_query (a int) | 2 | 0 + Drop Table test_utility_query | 2 | 0 + SELECT pg_stat_statements_reset() | 1 | 1 + SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 + SET utility_query_id = 'jumble' | 1 | 0 +(7 rows) + +RESET utility_query_id; -- -- Track the total number of rows retrieved or affected by the utility -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW, diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index 8f5c866225..81d663f81c 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -258,6 +258,7 @@ DROP TABLE pgss_a, pgss_b CASCADE; -- utility commands -- SET pg_stat_statements.track_utility = TRUE; +SET utility_query_id = 'string'; SELECT pg_stat_statements_reset(); SELECT 1; @@ -272,6 +273,22 @@ DROP FUNCTION PLUS_TWO(INTEGER); SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +SELECT pg_stat_statements_reset(); +SET utility_query_id = 'jumble'; +-- These queries have a different string, but the same parsing +-- representation. +Begin; +Create Table test_utility_query (a int); +Drop Table test_utility_query; +Commit; +BEGIN; +CREATE TABLE test_utility_query (a int); +DROP TABLE test_utility_query; +COMMIT; + +SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +RESET utility_query_id; + -- -- Track the total number of rows retrieved or affected by the utility -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW, -- 2.39.0
signature.asc
Description: PGP signature