I wrote:
> Robert Haas <robertmh...@gmail.com> writes:
>> Yeah, maybe you're right.  But I'd still prefer to see us break the
>> ABI and do this just in 9.0 rather than changing 8.4.

> OK, I can live with that.  I'll take a look at it shortly.

Proposed patch attached (compiles, untested as yet).

                        regards, tom lane

Index: src/backend/commands/analyze.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/commands/analyze.c,v
retrieving revision 1.152
diff -c -r1.152 analyze.c
*** src/backend/commands/analyze.c	26 Feb 2010 02:00:37 -0000	1.152
--- src/backend/commands/analyze.c	1 Aug 2010 19:56:12 -0000
***************
*** 92,98 ****
  					AnlIndexData *indexdata, int nindexes,
  					HeapTuple *rows, int numrows,
  					MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
  					int targrows, double *totalrows, double *totaldeadrows);
  static double random_fract(void);
--- 92,99 ----
  					AnlIndexData *indexdata, int nindexes,
  					HeapTuple *rows, int numrows,
  					MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum,
! 									   Node *index_expr);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
  					int targrows, double *totalrows, double *totaldeadrows);
  static double random_fract(void);
***************
*** 339,345 ****
  						(errcode(ERRCODE_UNDEFINED_COLUMN),
  					errmsg("column \"%s\" of relation \"%s\" does not exist",
  						   col, RelationGetRelationName(onerel))));
! 			vacattrstats[tcnt] = examine_attribute(onerel, i);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
--- 340,346 ----
  						(errcode(ERRCODE_UNDEFINED_COLUMN),
  					errmsg("column \"%s\" of relation \"%s\" does not exist",
  						   col, RelationGetRelationName(onerel))));
! 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
***************
*** 353,359 ****
  		tcnt = 0;
  		for (i = 1; i <= attr_cnt; i++)
  		{
! 			vacattrstats[tcnt] = examine_attribute(onerel, i);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
--- 354,360 ----
  		tcnt = 0;
  		for (i = 1; i <= attr_cnt; i++)
  		{
! 			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
  			if (vacattrstats[tcnt] != NULL)
  				tcnt++;
  		}
***************
*** 407,427 ****
  							elog(ERROR, "too few entries in indexprs list");
  						indexkey = (Node *) lfirst(indexpr_item);
  						indexpr_item = lnext(indexpr_item);
- 
- 						/*
- 						 * Can't analyze if the opclass uses a storage type
- 						 * different from the expression result type. We'd get
- 						 * confused because the type shown in pg_attribute for
- 						 * the index column doesn't match what we are getting
- 						 * from the expression. Perhaps this can be fixed
- 						 * someday, but for now, punt.
- 						 */
- 						if (exprType(indexkey) !=
- 							Irel[ind]->rd_att->attrs[i]->atttypid)
- 							continue;
- 
  						thisdata->vacattrstats[tcnt] =
! 							examine_attribute(Irel[ind], i + 1);
  						if (thisdata->vacattrstats[tcnt] != NULL)
  						{
  							tcnt++;
--- 408,415 ----
  							elog(ERROR, "too few entries in indexprs list");
  						indexkey = (Node *) lfirst(indexpr_item);
  						indexpr_item = lnext(indexpr_item);
  						thisdata->vacattrstats[tcnt] =
! 							examine_attribute(Irel[ind], i + 1, indexkey);
  						if (thisdata->vacattrstats[tcnt] != NULL)
  						{
  							tcnt++;
***************
*** 802,810 ****
   *
   * Determine whether the column is analyzable; if so, create and initialize
   * a VacAttrStats struct for it.  If not, return NULL.
   */
  static VacAttrStats *
! examine_attribute(Relation onerel, int attnum)
  {
  	Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
  	HeapTuple	typtuple;
--- 790,801 ----
   *
   * Determine whether the column is analyzable; if so, create and initialize
   * a VacAttrStats struct for it.  If not, return NULL.
+  *
+  * If index_expr isn't NULL, then we're trying to analyze an expression index,
+  * and index_expr is the expression tree representing the column's data.
   */
  static VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr)
  {
  	Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
  	HeapTuple	typtuple;
***************
*** 827,835 ****
  	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
  	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
  	memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
! 	typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid));
  	if (!HeapTupleIsValid(typtuple))
! 		elog(ERROR, "cache lookup failed for type %u", attr->atttypid);
  	stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
  	memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
  	ReleaseSysCache(typtuple);
--- 818,847 ----
  	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
  	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
  	memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
! 
! 	/*
! 	 * When analyzing an expression index, believe the expression tree's type
! 	 * not the column datatype --- the latter might be the opckeytype storage
! 	 * type of the opclass, which is not interesting for our purposes.  (Note:
! 	 * if we did anything with non-expression index columns, we'd need to
! 	 * figure out where to get the correct type info from, but for now that's
! 	 * not a problem.)  It's not clear whether anyone will care about the
! 	 * typmod, but we store that too just in case.
! 	 */
! 	if (index_expr)
! 	{
! 		stats->attrtypid = exprType(index_expr);
! 		stats->attrtypmod = exprTypmod(index_expr);
! 	}
! 	else
! 	{
! 		stats->attrtypid = attr->atttypid;
! 		stats->attrtypmod = attr->atttypmod;
! 	}
! 
! 	typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
  	if (!HeapTupleIsValid(typtuple))
! 		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
  	stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
  	memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
  	ReleaseSysCache(typtuple);
***************
*** 838,849 ****
  
  	/*
  	 * The fields describing the stats->stavalues[n] element types default to
! 	 * the type of the field being analyzed, but the type-specific typanalyze
  	 * function can change them if it wants to store something else.
  	 */
  	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
  	{
! 		stats->statypid[i] = stats->attr->atttypid;
  		stats->statyplen[i] = stats->attrtype->typlen;
  		stats->statypbyval[i] = stats->attrtype->typbyval;
  		stats->statypalign[i] = stats->attrtype->typalign;
--- 850,861 ----
  
  	/*
  	 * The fields describing the stats->stavalues[n] element types default to
! 	 * the type of the data being analyzed, but the type-specific typanalyze
  	 * function can change them if it wants to store something else.
  	 */
  	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
  	{
! 		stats->statypid[i] = stats->attrtypid;
  		stats->statyplen[i] = stats->attrtype->typlen;
  		stats->statypbyval[i] = stats->attrtype->typbyval;
  		stats->statypalign[i] = stats->attrtype->typalign;
***************
*** 1780,1786 ****
  		attr->attstattarget = default_statistics_target;
  
  	/* Look for default "<" and "=" operators for column's type */
! 	get_sort_group_operators(attr->atttypid,
  							 false, false, false,
  							 &ltopr, &eqopr, NULL);
  
--- 1792,1798 ----
  		attr->attstattarget = default_statistics_target;
  
  	/* Look for default "<" and "=" operators for column's type */
! 	get_sort_group_operators(stats->attrtypid,
  							 false, false, false,
  							 &ltopr, &eqopr, NULL);
  
***************
*** 1860,1869 ****
  	int			nonnull_cnt = 0;
  	int			toowide_cnt = 0;
  	double		total_width = 0;
! 	bool		is_varlena = (!stats->attr->attbyval &&
! 							  stats->attr->attlen == -1);
! 	bool		is_varwidth = (!stats->attr->attbyval &&
! 							   stats->attr->attlen < 0);
  	FmgrInfo	f_cmpeq;
  	typedef struct
  	{
--- 1872,1881 ----
  	int			nonnull_cnt = 0;
  	int			toowide_cnt = 0;
  	double		total_width = 0;
! 	bool		is_varlena = (!stats->attrtype->typbyval &&
! 							  stats->attrtype->typlen == -1);
! 	bool		is_varwidth = (!stats->attrtype->typbyval &&
! 							   stats->attrtype->typlen < 0);
  	FmgrInfo	f_cmpeq;
  	typedef struct
  	{
***************
*** 2126,2133 ****
  			for (i = 0; i < num_mcv; i++)
  			{
  				mcv_values[i] = datumCopy(track[i].value,
! 										  stats->attr->attbyval,
! 										  stats->attr->attlen);
  				mcv_freqs[i] = (double) track[i].count / (double) samplerows;
  			}
  			MemoryContextSwitchTo(old_context);
--- 2138,2145 ----
  			for (i = 0; i < num_mcv; i++)
  			{
  				mcv_values[i] = datumCopy(track[i].value,
! 										  stats->attrtype->typbyval,
! 										  stats->attrtype->typlen);
  				mcv_freqs[i] = (double) track[i].count / (double) samplerows;
  			}
  			MemoryContextSwitchTo(old_context);
***************
*** 2184,2193 ****
  	int			nonnull_cnt = 0;
  	int			toowide_cnt = 0;
  	double		total_width = 0;
! 	bool		is_varlena = (!stats->attr->attbyval &&
! 							  stats->attr->attlen == -1);
! 	bool		is_varwidth = (!stats->attr->attbyval &&
! 							   stats->attr->attlen < 0);
  	double		corr_xysum;
  	Oid			cmpFn;
  	int			cmpFlags;
--- 2196,2205 ----
  	int			nonnull_cnt = 0;
  	int			toowide_cnt = 0;
  	double		total_width = 0;
! 	bool		is_varlena = (!stats->attrtype->typbyval &&
! 							  stats->attrtype->typlen == -1);
! 	bool		is_varwidth = (!stats->attrtype->typbyval &&
! 							   stats->attrtype->typlen < 0);
  	double		corr_xysum;
  	Oid			cmpFn;
  	int			cmpFlags;
***************
*** 2476,2483 ****
  			for (i = 0; i < num_mcv; i++)
  			{
  				mcv_values[i] = datumCopy(values[track[i].first].value,
! 										  stats->attr->attbyval,
! 										  stats->attr->attlen);
  				mcv_freqs[i] = (double) track[i].count / (double) samplerows;
  			}
  			MemoryContextSwitchTo(old_context);
--- 2488,2495 ----
  			for (i = 0; i < num_mcv; i++)
  			{
  				mcv_values[i] = datumCopy(values[track[i].first].value,
! 										  stats->attrtype->typbyval,
! 										  stats->attrtype->typlen);
  				mcv_freqs[i] = (double) track[i].count / (double) samplerows;
  			}
  			MemoryContextSwitchTo(old_context);
***************
*** 2583,2590 ****
  			for (i = 0; i < num_hist; i++)
  			{
  				hist_values[i] = datumCopy(values[pos].value,
! 										   stats->attr->attbyval,
! 										   stats->attr->attlen);
  				pos += delta;
  				posfrac += deltafrac;
  				if (posfrac >= (num_hist - 1))
--- 2595,2602 ----
  			for (i = 0; i < num_hist; i++)
  			{
  				hist_values[i] = datumCopy(values[pos].value,
! 										   stats->attrtype->typbyval,
! 										   stats->attrtype->typlen);
  				pos += delta;
  				posfrac += deltafrac;
  				if (posfrac >= (num_hist - 1))
Index: src/include/commands/vacuum.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/commands/vacuum.h,v
retrieving revision 1.89
diff -c -r1.89 vacuum.h
*** src/include/commands/vacuum.h	9 Feb 2010 21:43:30 -0000	1.89
--- src/include/commands/vacuum.h	1 Aug 2010 19:56:12 -0000
***************
*** 62,70 ****
  	/*
  	 * These fields are set up by the main ANALYZE code before invoking the
  	 * type-specific typanalyze function.
  	 */
  	Form_pg_attribute attr;		/* copy of pg_attribute row for column */
! 	Form_pg_type attrtype;		/* copy of pg_type row for column */
  	MemoryContext anl_context;	/* where to save long-lived data */
  
  	/*
--- 62,78 ----
  	/*
  	 * These fields are set up by the main ANALYZE code before invoking the
  	 * type-specific typanalyze function.
+ 	 *
+ 	 * Note: do not assume that the data being analyzed has the same datatype
+ 	 * shown in attr, ie do not trust attr->atttypid, attlen, etc.  This is
+ 	 * because some index opclasses store a different type than the underlying
+ 	 * column/expression.  Instead use attrtypid, attrtypmod, and attrtype for
+ 	 * information about the datatype being fed to the typanalyze function.
  	 */
  	Form_pg_attribute attr;		/* copy of pg_attribute row for column */
! 	Oid			attrtypid;		/* type of data being analyzed */
! 	int32		attrtypmod;		/* typmod of data being analyzed */
! 	Form_pg_type attrtype;		/* copy of pg_type row for attrtypid */
  	MemoryContext anl_context;	/* where to save long-lived data */
  
  	/*
***************
*** 95,104 ****
  
  	/*
  	 * These fields describe the stavalues[n] element types. They will be
! 	 * initialized to be the same as the column's that's underlying the slot,
! 	 * but a custom typanalyze function might want to store an array of
! 	 * something other than the analyzed column's elements. It should then
! 	 * overwrite these fields.
  	 */
  	Oid			statypid[STATISTIC_NUM_SLOTS];
  	int2		statyplen[STATISTIC_NUM_SLOTS];
--- 103,111 ----
  
  	/*
  	 * These fields describe the stavalues[n] element types. They will be
! 	 * initialized to match attrtypid, but a custom typanalyze function might
! 	 * want to store an array of something other than the analyzed column's
! 	 * elements. It should then overwrite these fields.
  	 */
  	Oid			statypid[STATISTIC_NUM_SLOTS];
  	int2		statyplen[STATISTIC_NUM_SLOTS];
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to