Jeff Janes <jeff.ja...@gmail.com> writes:
> On Fri, Nov 13, 2015 at 3:13 PM, Tom Lane <t...@sss.pgh.pa.us> wrote:
>> There's probably no reason not to do that, but I'd be much more interested
>> in eliminating the slowness to begin with ...

> I was thinking about that as well, but I don't think that would be
> back-patchable, at least not the way I was envisioning it.

> I was thinking of detecting bad cases (had to count to over 10 before
> finding a novel name, more than 10 times) and then switching from an
> object-local count, to a global count, for the numbers to add to the
> name.  But that would be a behavior change under some conditions.

I experimented with using a hash table to avoid the O(N^2) behavior.
This seems to work quite well, and I think it doesn't change the results
(leastwise, it does not break the regression tests).

It would be possible to do something similar to dodge the O(N^2) costs
in make_colname_unique/colname_is_unique; but it would be a lot messier,
and the tests I did suggest that it's fairly hard to get into a regime
where those functions are a huge part of the runtime anyway, because
we have limits on the numbers of columns.  So I'm inclined to leave that
alone.

                        regards, tom lane

diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 3388f7c..2b6de8f 100644
*** a/src/backend/utils/adt/ruleutils.c
--- b/src/backend/utils/adt/ruleutils.c
***************
*** 55,60 ****
--- 55,61 ----
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/fmgroids.h"
+ #include "utils/hsearch.h"
  #include "utils/lsyscache.h"
  #include "utils/rel.h"
  #include "utils/ruleutils.h"
*************** typedef struct
*** 267,272 ****
--- 268,282 ----
  #define deparse_columns_fetch(rangetable_index, dpns) \
  	((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))
  
+ /*
+  * Entry in set_rtable_names' hash table
+  */
+ typedef struct
+ {
+ 	char		name[NAMEDATALEN];		/* Hash key --- must be first */
+ 	int			counter;		/* Largest addition used so far for name */
+ } NameHashEntry;
+ 
  
  /* ----------
   * Global data
*************** static void print_function_rettype(Strin
*** 312,319 ****
  static void print_function_trftypes(StringInfo buf, HeapTuple proctup);
  static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
  				 Bitmapset *rels_used);
- static bool refname_is_unique(char *refname, deparse_namespace *dpns,
- 				  List *parent_namespaces);
  static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
  					  List *parent_namespaces);
  static void set_simple_column_names(deparse_namespace *dpns);
--- 322,327 ----
*************** static void
*** 2676,2690 ****
  set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
  				 Bitmapset *rels_used)
  {
  	ListCell   *lc;
- 	int			rtindex = 1;
  
  	dpns->rtable_names = NIL;
  	foreach(lc, dpns->rtable)
  	{
  		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
  		char	   *refname;
  
  		if (rels_used && !bms_is_member(rtindex, rels_used))
  		{
  			/* Ignore unreferenced RTE */
--- 2684,2744 ----
  set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
  				 Bitmapset *rels_used)
  {
+ 	HASHCTL		hash_ctl;
+ 	HTAB	   *names_hash;
+ 	NameHashEntry *hentry;
+ 	bool		found;
+ 	int			rtindex;
  	ListCell   *lc;
  
  	dpns->rtable_names = NIL;
+ 	/* nothing more to do if empty rtable */
+ 	if (dpns->rtable == NIL)
+ 		return;
+ 
+ 	/*
+ 	 * We use a hash table to hold known names, so that this process is O(N)
+ 	 * not O(N^2) for N names.
+ 	 */
+ 	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+ 	hash_ctl.keysize = NAMEDATALEN;
+ 	hash_ctl.entrysize = sizeof(NameHashEntry);
+ 	hash_ctl.hcxt = CurrentMemoryContext;
+ 	names_hash = hash_create("set_rtable_names names",
+ 							 list_length(dpns->rtable),
+ 							 &hash_ctl,
+ 							 HASH_ELEM | HASH_CONTEXT);
+ 	/* Preload the hash table with names appearing in parent_namespaces */
+ 	foreach(lc, parent_namespaces)
+ 	{
+ 		deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
+ 		ListCell   *lc2;
+ 
+ 		foreach(lc2, olddpns->rtable_names)
+ 		{
+ 			char	   *oldname = (char *) lfirst(lc2);
+ 
+ 			if (oldname == NULL)
+ 				continue;
+ 			hentry = (NameHashEntry *) hash_search(names_hash,
+ 												   oldname,
+ 												   HASH_ENTER,
+ 												   &found);
+ 			/* we do not complain about duplicate names in parent namespaces */
+ 			hentry->counter = 0;
+ 		}
+ 	}
+ 
+ 	/* Now we can scan the rtable */
+ 	rtindex = 1;
  	foreach(lc, dpns->rtable)
  	{
  		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
  		char	   *refname;
  
+ 		/* Just in case this takes an unreasonable amount of time ... */
+ 		CHECK_FOR_INTERRUPTS();
+ 
  		if (rels_used && !bms_is_member(rtindex, rels_used))
  		{
  			/* Ignore unreferenced RTE */
*************** set_rtable_names(deparse_namespace *dpns
*** 2712,2767 ****
  		}
  
  		/*
! 		 * If the selected name isn't unique, append digits to make it so
  		 */
! 		if (refname &&
! 			!refname_is_unique(refname, dpns, parent_namespaces))
  		{
! 			char	   *modname = (char *) palloc(strlen(refname) + 32);
! 			int			i = 0;
  
! 			do
  			{
! 				sprintf(modname, "%s_%d", refname, ++i);
! 			} while (!refname_is_unique(modname, dpns, parent_namespaces));
! 			refname = modname;
  		}
  
  		dpns->rtable_names = lappend(dpns->rtable_names, refname);
  		rtindex++;
  	}
- }
- 
- /*
-  * refname_is_unique: is refname distinct from all already-chosen RTE names?
-  */
- static bool
- refname_is_unique(char *refname, deparse_namespace *dpns,
- 				  List *parent_namespaces)
- {
- 	ListCell   *lc;
- 
- 	foreach(lc, dpns->rtable_names)
- 	{
- 		char	   *oldname = (char *) lfirst(lc);
- 
- 		if (oldname && strcmp(oldname, refname) == 0)
- 			return false;
- 	}
- 	foreach(lc, parent_namespaces)
- 	{
- 		deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
- 		ListCell   *lc2;
- 
- 		foreach(lc2, olddpns->rtable_names)
- 		{
- 			char	   *oldname = (char *) lfirst(lc2);
  
! 			if (oldname && strcmp(oldname, refname) == 0)
! 				return false;
! 		}
! 	}
! 	return true;
  }
  
  /*
--- 2766,2809 ----
  		}
  
  		/*
! 		 * If the selected name isn't unique, append digits to make it so, and
! 		 * make a new hash entry for it once we've got a unique name.
  		 */
! 		if (refname)
  		{
! 			hentry = (NameHashEntry *) hash_search(names_hash,
! 												   refname,
! 												   HASH_ENTER,
! 												   &found);
! 			if (found)
! 			{
! 				/* Name already in use, must choose a new one */
! 				char	   *modname = (char *) palloc(strlen(refname) + 32);
! 				NameHashEntry *hentry2;
  
! 				do
! 				{
! 					sprintf(modname, "%s_%d", refname, ++(hentry->counter));
! 					hentry2 = (NameHashEntry *) hash_search(names_hash,
! 															modname,
! 															HASH_ENTER,
! 															&found);
! 				} while (found);
! 				hentry2->counter = 0;	/* init new hash entry */
! 				refname = modname;
! 			}
! 			else
  			{
! 				/* Name not previously used, need only initialize hentry */
! 				hentry->counter = 0;
! 			}
  		}
  
  		dpns->rtable_names = lappend(dpns->rtable_names, refname);
  		rtindex++;
  	}
  
! 	hash_destroy(names_hash);
  }
  
  /*
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to