diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
new file mode 100644
index c4dfdf2..72949e4
*** a/contrib/pg_trgm/trgm_regexp.c
--- b/contrib/pg_trgm/trgm_regexp.c
***************
*** 122,130 ****
   * thousands of trigrams would be slow, and would likely produce so many
   * false positives that we would have to traverse a large fraction of the
   * index, the graph is simplified further in a lossy fashion by removing
!  * color trigrams until the number of trigrams after expansion is below
!  * the MAX_TRGM_COUNT threshold.  When a color trigram is removed, the states
!  * connected by any arcs labelled with that trigram are merged.
   *
   * 4) Pack the graph into a compact representation
   * -----------------------------------------------
--- 122,142 ----
   * thousands of trigrams would be slow, and would likely produce so many
   * false positives that we would have to traverse a large fraction of the
   * index, the graph is simplified further in a lossy fashion by removing
!  * color trigrams. Also, trigrams itself have not equivalent value: some of
!  * them are more frequent and some of them are less frequent. Wishfully, we
!  * would like to know the distribution of trigrams, but we don't. But because
!  * of padding we for sure know that empty character is more frequent than
!  * others. Assuming that we divide trigrams into classes according to presence
!  * of whitespace. Their average frequencies relative to average frequency
!  * of trigrams without whitespace are calculated by real-life texts, called
!  * classes penalties and stored into "penalties" array. Then we introduce
!  * penalty of color trigram which is multiplication of number of its trigrams
!  * count and its class penalty. While removing color trigrams we're trying to
!  * minimize it's summary penalty, but the restriction is total number of
!  * trigrams. We would like to achieve WISH_TRGM_PENALTY summary penalty and
!  * assume fail if we don't reach MAX_TRGM_COUNT threshold for total number of
!  * trigrams. When a color trigram is removed, the states connected by any arcs
!  * labelled with that trigram are merged.
   *
   * 4) Pack the graph into a compact representation
   * -----------------------------------------------
***************
*** 199,211 ****
--- 211,240 ----
   *	MAX_EXPANDED_STATES - How many states we allow in expanded graph
   *	MAX_EXPANDED_ARCS - How many arcs we allow in expanded graph
   *	MAX_TRGM_COUNT - How many simple trigrams we allow to be extracted
+  *	WISH_TRGM_PENALTY - Desired sum of color trigrams penalties
   *	COLOR_COUNT_LIMIT - Maximum number of characters per color
   */
  #define MAX_EXPANDED_STATES 128
  #define MAX_EXPANDED_ARCS	1024
  #define MAX_TRGM_COUNT		256
+ #define WISH_TRGM_PENALTY	16
  #define COLOR_COUNT_LIMIT	256
  
+ /*
+  * Penalty of trigrams depending on whitespace presence. Based on analysis of
+  * real-life texts.
+  */
+ const float4 penalties[8] = {
+ 	 1.0, /* "aaa" */
+ 	 3.5, /* "aa " */
+ 	 0.0, /* "a a" (impossible) */
+ 	 0.0, /* "a  " (impossible) */
+ 	 4.2, /* " aa" */
+ 	 2.1, /* " a " */
+ 	25.0, /* "  a" */
+ 	 1.0  /* "   " (impossible) */
+ };
+ 
  /* Struct representing a single pg_wchar, converted back to multibyte form */
  typedef struct
  {
*************** typedef struct
*** 339,344 ****
--- 368,374 ----
  	ColorTrgm	ctrgm;
  	int			number;
  	int			count;
+ 	float4		penalty;
  	bool		expanded;
  	List	   *arcs;
  } ColorTrgmInfo;
*************** static TRGM *expandColorTrigrams(TrgmNFA
*** 459,465 ****
  static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3]);
  static void mergeStates(TrgmState *state1, TrgmState *state2);
  static int	colorTrgmInfoCmp(const void *p1, const void *p2);
! static int	colorTrgmInfoCountCmp(const void *p1, const void *p2);
  static TrgmPackedGraph *packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext);
  static int	packArcInfoCmp(const void *a1, const void *a2);
  
--- 489,495 ----
  static void fillTrgm(trgm *ptrgm, trgm_mb_char s[3]);
  static void mergeStates(TrgmState *state1, TrgmState *state2);
  static int	colorTrgmInfoCmp(const void *p1, const void *p2);
! static int	colorTrgmInfoPenaltyCmp(const void *p1, const void *p2);
  static TrgmPackedGraph *packGraph(TrgmNFA *trgmNFA, MemoryContext rcontext);
  static int	packArcInfoCmp(const void *a1, const void *a2);
  
*************** selectColorTrigrams(TrgmNFA *trgmNFA)
*** 1424,1429 ****
--- 1454,1460 ----
  	TrgmState  *state;
  	ColorTrgmInfo *colorTrgms;
  	int64		totalTrgmCount;
+ 	float4		totalTrgmPenalty;
  	int			number;
  
  	/* Collect color trigrams from all arcs */
*************** selectColorTrigrams(TrgmNFA *trgmNFA)
*** 1490,1528 ****
  	 * overflow an int, so we use int64 for that within this routine.
  	 */
  	totalTrgmCount = 0;
  	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
  	{
  		ColorTrgmInfo *trgmInfo = &colorTrgms[i];
  		int			j,
! 					count = 1;
  
  		for (j = 0; j < 3; j++)
  		{
  			TrgmColor	c = trgmInfo->ctrgm.colors[j];
  
! 			if (c != COLOR_BLANK)
  				count *= trgmNFA->colorInfo[c].wordCharsCount;
  		}
  		trgmInfo->count = count;
  		totalTrgmCount += count;
  	}
  
! 	/* Sort color trigrams in descending order of simple trigram counts */
  	qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
! 		  colorTrgmInfoCountCmp);
  
  	/*
! 	 * Remove color trigrams from the graph so long as total number of simple
! 	 * trigrams exceeds MAX_TRGM_COUNT.  We prefer to remove color trigrams
! 	 * with the most associated simple trigrams, since those are the most
! 	 * promising for reducing the total number of simple trigrams.	When
! 	 * removing a color trigram we have to merge states connected by arcs
! 	 * labeled with that trigram.  It's necessary to not merge initial and
! 	 * final states, because our graph becomes useless if that happens; so we
! 	 * cannot always remove the trigram we'd prefer to.
  	 */
  	for (i = 0;
! 		 (i < trgmNFA->colorTrgmsCount) && (totalTrgmCount > MAX_TRGM_COUNT);
  		 i++)
  	{
  		ColorTrgmInfo *trgmInfo = &colorTrgms[i];
--- 1521,1568 ----
  	 * overflow an int, so we use int64 for that within this routine.
  	 */
  	totalTrgmCount = 0;
+ 	totalTrgmPenalty = 0.0f;
  	for (i = 0; i < trgmNFA->colorTrgmsCount; i++)
  	{
  		ColorTrgmInfo *trgmInfo = &colorTrgms[i];
  		int			j,
! 					count = 1,
! 					typeIndex = 0;
  
  		for (j = 0; j < 3; j++)
  		{
  			TrgmColor	c = trgmInfo->ctrgm.colors[j];
  
! 			typeIndex *= 2;
! 			if (c == COLOR_BLANK)
! 				typeIndex++;
! 			else
  				count *= trgmNFA->colorInfo[c].wordCharsCount;
  		}
  		trgmInfo->count = count;
+ 		trgmInfo->penalty = (float4)count * penalties[typeIndex];
  		totalTrgmCount += count;
+ 		totalTrgmPenalty += trgmInfo->penalty;
+ 
  	}
  
! 	/* Sort color trigrams in descending order of their penalties */
  	qsort(colorTrgms, trgmNFA->colorTrgmsCount, sizeof(ColorTrgmInfo),
! 		  colorTrgmInfoPenaltyCmp);
  
  	/*
! 	 * Remove color trigrams from the graph so long as total penalty of color
! 	 * trigrams exceeds WISH_TRGM_PENALTY. But if we can't reach
! 	 * WISH_TRGM_PENALTY it's OK to be in MAX_TRGM_COUNT. We prefer to remove
! 	 * color trigrams with higher penalty, since those are the most promising
! 	 * for reducing the total penalty. When removing a color trigram we have
! 	 * to merge states connected by arcs labeled with that trigram.  It's
! 	 * necessary to not merge initial and final states, because our graph
! 	 * becomes useless if that happens; so we cannot always remove the trigram
! 	 * we'd prefer to.
  	 */
  	for (i = 0;
! 		 (i < trgmNFA->colorTrgmsCount) && (totalTrgmPenalty > WISH_TRGM_PENALTY);
  		 i++)
  	{
  		ColorTrgmInfo *trgmInfo = &colorTrgms[i];
*************** selectColorTrigrams(TrgmNFA *trgmNFA)
*** 1572,1585 ****
  
  		/* Mark trigram unexpanded, and update totalTrgmCount */
  		trgmInfo->expanded = false;
  		totalTrgmCount -= trgmInfo->count;
  	}
  
! 	/* Did we succeed in fitting into MAX_TRGM_COUNT? */
  	if (totalTrgmCount > MAX_TRGM_COUNT)
  		return false;
  
! 	trgmNFA->totalTrgmCount = (int) totalTrgmCount;
  
  	/*
  	 * Sort color trigrams by colors (will be useful for bsearch in packGraph)
--- 1612,1626 ----
  
  		/* Mark trigram unexpanded, and update totalTrgmCount */
  		trgmInfo->expanded = false;
+ 		totalTrgmPenalty -= trgmInfo->penalty;
  		totalTrgmCount -= trgmInfo->count;
  	}
  
! 	/* Did we succeed in fitting into MAX_TRGM_SIZE? */
  	if (totalTrgmCount > MAX_TRGM_COUNT)
  		return false;
  
! 	trgmNFA->totalTrgmCount = totalTrgmCount;
  
  	/*
  	 * Sort color trigrams by colors (will be useful for bsearch in packGraph)
*************** colorTrgmInfoCmp(const void *p1, const v
*** 1749,1768 ****
   * their simple trigrams counts.
   */
  static int
! colorTrgmInfoCountCmp(const void *p1, const void *p2)
  {
! 	const ColorTrgmInfo *c1 = (const ColorTrgmInfo *) p1;
! 	const ColorTrgmInfo *c2 = (const ColorTrgmInfo *) p2;
  
! 	if (c1->count < c2->count)
  		return 1;
! 	else if (c1->count == c2->count)
  		return 0;
  	else
  		return -1;
  }
  
- 
  /*---------------------
   * Subroutines for packing the graph into final representation (stage 4).
   *---------------------
--- 1790,1808 ----
   * their simple trigrams counts.
   */
  static int
! colorTrgmInfoPenaltyCmp(const void *p1, const void *p2)
  {
! 	float4 penalty1 = ((const ColorTrgmInfo *) p1)->penalty;
! 	float4 penalty2 = ((const ColorTrgmInfo *) p2)->penalty;
  
! 	if (penalty1 < penalty2)
  		return 1;
! 	else if (penalty1 == penalty2)
  		return 0;
  	else
  		return -1;
  }
  
  /*---------------------
   * Subroutines for packing the graph into final representation (stage 4).
   *---------------------
