The attached patch implements a one-value pattern cache for the multi-byte encoding case for ILIKE. This reduces calls to lower() by (50% -1) in the common case where the pattern is a constant. My own testing and Guillaume Smet's show that this cuts roughly in half the performance penalty we inflicted by using lower() in that case.

Is this sufficiently low risk to sneak into 8.3?

cheers

andrew
Index: src/backend/utils/adt/like.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/like.c,v
retrieving revision 1.71
diff -c -r1.71 like.c
*** src/backend/utils/adt/like.c	22 Sep 2007 03:58:34 -0000	1.71
--- src/backend/utils/adt/like.c	22 Sep 2007 12:16:23 -0000
***************
*** 139,144 ****
--- 139,149 ----
  			   *p;
  	int			slen,
  				plen;
+ 	static char patcache[512], lpatcache[512];
+     static int  patcachelen = 0, lpatcachelen = 0;
+ 
+ 	p = VARDATA_ANY(pat);
+ 	plen = VARSIZE_ANY_EXHDR(pat);
  
  	/* For efficiency reasons, in the single byte case we don't call
  	 * lower() on the pattern and text, but instead call to_lower on each
***************
*** 147,156 ****
  
  	if (pg_database_encoding_max_length() > 1)
  	{
  		/* lower's result is never packed, so OK to use old macros here */
- 		pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
- 		p = VARDATA(pat);
- 		plen = (VARSIZE(pat) - VARHDRSZ);
  		str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
  		s = VARDATA(str);
  		slen = (VARSIZE(str) - VARHDRSZ);
--- 152,192 ----
  
  	if (pg_database_encoding_max_length() > 1)
  	{
+ 		if (plen > 0  && plen == patcachelen && strncmp(p,patcache,plen) == 0)
+ 		{
+ 			p = lpatcache;
+ 			plen = lpatcachelen;
+ 		}
+ 		else
+ 		{
+ 			char *lp;
+ 			int   lplen;
+ 
+ 			pat = DatumGetTextP(DirectFunctionCall1(lower, 
+ 													PointerGetDatum(pat)));
+ 
+ 			/* lower's result is never packed, so OK to use old macros here */
+ 			lp = VARDATA(pat);
+ 			lplen = (VARSIZE(pat) - VARHDRSZ);
+ 
+ 			if (plen < 512 && lplen < 512)
+ 			{
+ 				patcachelen = plen;
+ 				lpatcachelen = lplen;
+ 				memcpy(patcache,p,plen);
+ 				memcpy(lpatcache,lp,lplen);
+ 			}
+ 			else
+ 			{
+ 				patcachelen = 0;
+ 				lpatcachelen = 0;
+ 			}
+ 
+ 			p = lp;
+ 			plen = lplen;
+ 		}
+ 
  		/* lower's result is never packed, so OK to use old macros here */
  		str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
  		s = VARDATA(str);
  		slen = (VARSIZE(str) - VARHDRSZ);
***************
*** 161,168 ****
  	}
  	else
  	{
- 		p = VARDATA_ANY(pat);
- 		plen = VARSIZE_ANY_EXHDR(pat);
  		s = VARDATA_ANY(str);
  		slen = VARSIZE_ANY_EXHDR(str);
  		return SB_IMatchText(s, slen, p, plen);
--- 197,202 ----
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to