I wrote: > Robert Haas <robertmh...@gmail.com> writes: >> Yeah, maybe you're right. But I'd still prefer to see us break the >> ABI and do this just in 9.0 rather than changing 8.4.
> OK, I can live with that. I'll take a look at it shortly. Proposed patch attached (compiles, untested as yet). regards, tom lane
Index: src/backend/commands/analyze.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/commands/analyze.c,v retrieving revision 1.152 diff -c -r1.152 analyze.c *** src/backend/commands/analyze.c 26 Feb 2010 02:00:37 -0000 1.152 --- src/backend/commands/analyze.c 1 Aug 2010 19:56:12 -0000 *************** *** 92,98 **** AnlIndexData *indexdata, int nindexes, HeapTuple *rows, int numrows, MemoryContext col_context); ! static VacAttrStats *examine_attribute(Relation onerel, int attnum); static int acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); static double random_fract(void); --- 92,99 ---- AnlIndexData *indexdata, int nindexes, HeapTuple *rows, int numrows, MemoryContext col_context); ! static VacAttrStats *examine_attribute(Relation onerel, int attnum, ! Node *index_expr); static int acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); static double random_fract(void); *************** *** 339,345 **** (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("column \"%s\" of relation \"%s\" does not exist", col, RelationGetRelationName(onerel)))); ! vacattrstats[tcnt] = examine_attribute(onerel, i); if (vacattrstats[tcnt] != NULL) tcnt++; } --- 340,346 ---- (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("column \"%s\" of relation \"%s\" does not exist", col, RelationGetRelationName(onerel)))); ! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); if (vacattrstats[tcnt] != NULL) tcnt++; } *************** *** 353,359 **** tcnt = 0; for (i = 1; i <= attr_cnt; i++) { ! vacattrstats[tcnt] = examine_attribute(onerel, i); if (vacattrstats[tcnt] != NULL) tcnt++; } --- 354,360 ---- tcnt = 0; for (i = 1; i <= attr_cnt; i++) { ! vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); if (vacattrstats[tcnt] != NULL) tcnt++; } *************** *** 407,427 **** elog(ERROR, "too few entries in indexprs list"); indexkey = (Node *) lfirst(indexpr_item); indexpr_item = lnext(indexpr_item); - - /* - * Can't analyze if the opclass uses a storage type - * different from the expression result type. We'd get - * confused because the type shown in pg_attribute for - * the index column doesn't match what we are getting - * from the expression. Perhaps this can be fixed - * someday, but for now, punt. - */ - if (exprType(indexkey) != - Irel[ind]->rd_att->attrs[i]->atttypid) - continue; - thisdata->vacattrstats[tcnt] = ! examine_attribute(Irel[ind], i + 1); if (thisdata->vacattrstats[tcnt] != NULL) { tcnt++; --- 408,415 ---- elog(ERROR, "too few entries in indexprs list"); indexkey = (Node *) lfirst(indexpr_item); indexpr_item = lnext(indexpr_item); thisdata->vacattrstats[tcnt] = ! examine_attribute(Irel[ind], i + 1, indexkey); if (thisdata->vacattrstats[tcnt] != NULL) { tcnt++; *************** *** 802,810 **** * * Determine whether the column is analyzable; if so, create and initialize * a VacAttrStats struct for it. If not, return NULL. */ static VacAttrStats * ! examine_attribute(Relation onerel, int attnum) { Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1]; HeapTuple typtuple; --- 790,801 ---- * * Determine whether the column is analyzable; if so, create and initialize * a VacAttrStats struct for it. If not, return NULL. + * + * If index_expr isn't NULL, then we're trying to analyze an expression index, + * and index_expr is the expression tree representing the column's data. */ static VacAttrStats * ! examine_attribute(Relation onerel, int attnum, Node *index_expr) { Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1]; HeapTuple typtuple; *************** *** 827,835 **** stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE); ! typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid)); if (!HeapTupleIsValid(typtuple)) ! elog(ERROR, "cache lookup failed for type %u", attr->atttypid); stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type)); memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type)); ReleaseSysCache(typtuple); --- 818,847 ---- stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE); ! ! /* ! * When analyzing an expression index, believe the expression tree's type ! * not the column datatype --- the latter might be the opckeytype storage ! * type of the opclass, which is not interesting for our purposes. (Note: ! * if we did anything with non-expression index columns, we'd need to ! * figure out where to get the correct type info from, but for now that's ! * not a problem.) It's not clear whether anyone will care about the ! * typmod, but we store that too just in case. ! */ ! if (index_expr) ! { ! stats->attrtypid = exprType(index_expr); ! stats->attrtypmod = exprTypmod(index_expr); ! } ! else ! { ! stats->attrtypid = attr->atttypid; ! stats->attrtypmod = attr->atttypmod; ! } ! ! typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid)); if (!HeapTupleIsValid(typtuple)) ! elog(ERROR, "cache lookup failed for type %u", stats->attrtypid); stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type)); memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type)); ReleaseSysCache(typtuple); *************** *** 838,849 **** /* * The fields describing the stats->stavalues[n] element types default to ! * the type of the field being analyzed, but the type-specific typanalyze * function can change them if it wants to store something else. */ for (i = 0; i < STATISTIC_NUM_SLOTS; i++) { ! stats->statypid[i] = stats->attr->atttypid; stats->statyplen[i] = stats->attrtype->typlen; stats->statypbyval[i] = stats->attrtype->typbyval; stats->statypalign[i] = stats->attrtype->typalign; --- 850,861 ---- /* * The fields describing the stats->stavalues[n] element types default to ! * the type of the data being analyzed, but the type-specific typanalyze * function can change them if it wants to store something else. */ for (i = 0; i < STATISTIC_NUM_SLOTS; i++) { ! stats->statypid[i] = stats->attrtypid; stats->statyplen[i] = stats->attrtype->typlen; stats->statypbyval[i] = stats->attrtype->typbyval; stats->statypalign[i] = stats->attrtype->typalign; *************** *** 1780,1786 **** attr->attstattarget = default_statistics_target; /* Look for default "<" and "=" operators for column's type */ ! get_sort_group_operators(attr->atttypid, false, false, false, <opr, &eqopr, NULL); --- 1792,1798 ---- attr->attstattarget = default_statistics_target; /* Look for default "<" and "=" operators for column's type */ ! get_sort_group_operators(stats->attrtypid, false, false, false, <opr, &eqopr, NULL); *************** *** 1860,1869 **** int nonnull_cnt = 0; int toowide_cnt = 0; double total_width = 0; ! bool is_varlena = (!stats->attr->attbyval && ! stats->attr->attlen == -1); ! bool is_varwidth = (!stats->attr->attbyval && ! stats->attr->attlen < 0); FmgrInfo f_cmpeq; typedef struct { --- 1872,1881 ---- int nonnull_cnt = 0; int toowide_cnt = 0; double total_width = 0; ! bool is_varlena = (!stats->attrtype->typbyval && ! stats->attrtype->typlen == -1); ! bool is_varwidth = (!stats->attrtype->typbyval && ! stats->attrtype->typlen < 0); FmgrInfo f_cmpeq; typedef struct { *************** *** 2126,2133 **** for (i = 0; i < num_mcv; i++) { mcv_values[i] = datumCopy(track[i].value, ! stats->attr->attbyval, ! stats->attr->attlen); mcv_freqs[i] = (double) track[i].count / (double) samplerows; } MemoryContextSwitchTo(old_context); --- 2138,2145 ---- for (i = 0; i < num_mcv; i++) { mcv_values[i] = datumCopy(track[i].value, ! stats->attrtype->typbyval, ! stats->attrtype->typlen); mcv_freqs[i] = (double) track[i].count / (double) samplerows; } MemoryContextSwitchTo(old_context); *************** *** 2184,2193 **** int nonnull_cnt = 0; int toowide_cnt = 0; double total_width = 0; ! bool is_varlena = (!stats->attr->attbyval && ! stats->attr->attlen == -1); ! bool is_varwidth = (!stats->attr->attbyval && ! stats->attr->attlen < 0); double corr_xysum; Oid cmpFn; int cmpFlags; --- 2196,2205 ---- int nonnull_cnt = 0; int toowide_cnt = 0; double total_width = 0; ! bool is_varlena = (!stats->attrtype->typbyval && ! stats->attrtype->typlen == -1); ! bool is_varwidth = (!stats->attrtype->typbyval && ! stats->attrtype->typlen < 0); double corr_xysum; Oid cmpFn; int cmpFlags; *************** *** 2476,2483 **** for (i = 0; i < num_mcv; i++) { mcv_values[i] = datumCopy(values[track[i].first].value, ! stats->attr->attbyval, ! stats->attr->attlen); mcv_freqs[i] = (double) track[i].count / (double) samplerows; } MemoryContextSwitchTo(old_context); --- 2488,2495 ---- for (i = 0; i < num_mcv; i++) { mcv_values[i] = datumCopy(values[track[i].first].value, ! stats->attrtype->typbyval, ! stats->attrtype->typlen); mcv_freqs[i] = (double) track[i].count / (double) samplerows; } MemoryContextSwitchTo(old_context); *************** *** 2583,2590 **** for (i = 0; i < num_hist; i++) { hist_values[i] = datumCopy(values[pos].value, ! stats->attr->attbyval, ! stats->attr->attlen); pos += delta; posfrac += deltafrac; if (posfrac >= (num_hist - 1)) --- 2595,2602 ---- for (i = 0; i < num_hist; i++) { hist_values[i] = datumCopy(values[pos].value, ! stats->attrtype->typbyval, ! stats->attrtype->typlen); pos += delta; posfrac += deltafrac; if (posfrac >= (num_hist - 1)) Index: src/include/commands/vacuum.h =================================================================== RCS file: /cvsroot/pgsql/src/include/commands/vacuum.h,v retrieving revision 1.89 diff -c -r1.89 vacuum.h *** src/include/commands/vacuum.h 9 Feb 2010 21:43:30 -0000 1.89 --- src/include/commands/vacuum.h 1 Aug 2010 19:56:12 -0000 *************** *** 62,70 **** /* * These fields are set up by the main ANALYZE code before invoking the * type-specific typanalyze function. */ Form_pg_attribute attr; /* copy of pg_attribute row for column */ ! Form_pg_type attrtype; /* copy of pg_type row for column */ MemoryContext anl_context; /* where to save long-lived data */ /* --- 62,78 ---- /* * These fields are set up by the main ANALYZE code before invoking the * type-specific typanalyze function. + * + * Note: do not assume that the data being analyzed has the same datatype + * shown in attr, ie do not trust attr->atttypid, attlen, etc. This is + * because some index opclasses store a different type than the underlying + * column/expression. Instead use attrtypid, attrtypmod, and attrtype for + * information about the datatype being fed to the typanalyze function. */ Form_pg_attribute attr; /* copy of pg_attribute row for column */ ! Oid attrtypid; /* type of data being analyzed */ ! int32 attrtypmod; /* typmod of data being analyzed */ ! Form_pg_type attrtype; /* copy of pg_type row for attrtypid */ MemoryContext anl_context; /* where to save long-lived data */ /* *************** *** 95,104 **** /* * These fields describe the stavalues[n] element types. They will be ! * initialized to be the same as the column's that's underlying the slot, ! * but a custom typanalyze function might want to store an array of ! * something other than the analyzed column's elements. It should then ! * overwrite these fields. */ Oid statypid[STATISTIC_NUM_SLOTS]; int2 statyplen[STATISTIC_NUM_SLOTS]; --- 103,111 ---- /* * These fields describe the stavalues[n] element types. They will be ! * initialized to match attrtypid, but a custom typanalyze function might ! * want to store an array of something other than the analyzed column's ! * elements. It should then overwrite these fields. */ Oid statypid[STATISTIC_NUM_SLOTS]; int2 statyplen[STATISTIC_NUM_SLOTS];
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers