I wrote:
> I have a few more things in mind I'm planning to look into:
> 
> - I'm not convinced that there's enough sanity checking in the input
> functions. I think you can send queries into the server using the binary
> protocol that you couldn't produce otherwise. For example, queries with
> multiple VAL nodes, with no operator to connect them. Does that wreak
> havoc in any of the functions? 

I added code to check that the query tree is well-formed. It was indeed
possible to send malformed queries in binary mode, which produced all
kinds of strange results.

> And the left-field of QueryOperator is an
> int16, what happens if you have query that exceeds
> that?

Apparently the field wraps around and you get a backend crash when it
tries to do access an array using a negative index. You need to have a
large stack (and increase max_stack_depth now that we have the check for
that in there) to reproduce it. Not sure if it's an exploitable security
vulnerability, but it might be.

I fixed this by making the left-field a uint32. There's no reason to
arbitrarily limit it to 16-bits, and it won't increase the disk/memory
footprint either now that QueryOperator and QueryOperand are separate
structs.

> And parse_query always produces trees that are in prefix notation,
> so the left-field is really redundant, but using tsqueryrecv, you could
> inject queries that are not in prefix notation; is there anything in the
> code that depends on that?

At least infix-function seems to depend on it. By checking that the tree
is well-formed, and the fact that the left operand is implicitly the
next node in the array, we know that it is in prefix notation.

> - There's many internal intermediate representations of a query:
> TSQuery, a QTNode-tree, NODE-tree (in tsquery_cleanup.c), prefix
> notation stack of QueryItems (in parser), infix-tree. Could we remove
> some of these?

I haven't looked into this yet. Might leave it for 8.4.

> - There's a lot of recursive functions. Are they all using
> check_stack_depth?

I added check_stack_depth() call to all recursive functions I found.
Some of them might have a natural limit so that you can't force
arbitrarily deep recursions, but check_stack_depth() is cheap enough
that seems best to just stick it into anything that might be a problem.

Patch attached. It's on top of the tsearch-refactor-2.patch I posted
earlier.

-- 
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery.c	2007-09-03 11:12:17.000000000 +0100
--- ./src/backend/utils/adt/tsquery.c	2007-09-05 11:59:09.000000000 +0100
***************
*** 21,26 ****
--- 21,27 ----
  #include "tsearch/ts_utils.h"
  #include "utils/memutils.h"
  #include "utils/pg_crc.h"
+ #include "nodes/bitmapset.h"
  
  
  struct TSQueryParserStateData
***************
*** 388,394 ****
   * QueryItems must be in polish (prefix) notation. 
   */
  static void
! findoprnd(QueryItem *ptr, int *pos)
  {
  	/* since this function recurses, it could be driven to stack overflow. */
  	check_stack_depth();
--- 389,395 ----
   * QueryItems must be in polish (prefix) notation. 
   */
  static void
! findoprnd(QueryItem *ptr, uint32 *pos)
  {
  	/* since this function recurses, it could be driven to stack overflow. */
  	check_stack_depth();
***************
*** 451,457 ****
  	TSQuery		query;
  	int			commonlen;
  	QueryItem  *ptr;
! 	int			pos = 0;
  	ListCell   *cell;
  
  	/* init state */
--- 452,458 ----
  	TSQuery		query;
  	int			commonlen;
  	QueryItem  *ptr;
! 	uint32		pos = 0;
  	ListCell   *cell;
  
  	/* init state */
***************
*** 792,797 ****
--- 793,799 ----
  	QueryItem  *item;
  	int			datalen = 0;
  	char	   *ptr;
+ 	Bitmapset  *parentset = NULL;
  
  	size = pq_getmsgint(buf, sizeof(uint32));
  	if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
***************
*** 814,819 ****
--- 816,830 ----
  				item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
  				item->operand.length = pq_getmsgint(buf, sizeof(int16));
  
+ 				/* Check that the weight bitmap is valid */
+ 				if (item->operand.weight < 0 || item->operand.weight > 0xF)
+ 					elog(ERROR, "invalid weight bitmap");
+ 
+ 				/* XXX: We don't check that the CRC is valid. Actually, if we
+ 				 * bothered to calculate it to verify, there would be no need
+ 				 * to transfer it.
+ 				 */
+ 
  				/*
  				 * Check that datalen doesn't grow too large. Without the
  				 * check, a malicious client could induce a buffer overflow
***************
*** 828,834 ****
  					elog(ERROR, "invalid tsquery; total operand length exceeded");
  
  				/* We can calculate distance from datalen, no need to send it
! 				 * through the wire. If we did, we would have to check that
  				 * it's valid anyway.
  				 */
  				item->operand.distance = datalen;
--- 839,845 ----
  					elog(ERROR, "invalid tsquery; total operand length exceeded");
  
  				/* We can calculate distance from datalen, no need to send it
! 				 * across the wire. If we did, we would have to check that
  				 * it's valid anyway.
  				 */
  				item->operand.distance = datalen;
***************
*** 842,864 ****
  					item->operator.oper != OP_OR &&
  					item->operator.oper != OP_AND)
  					elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
  				if(item->operator.oper != OP_NOT)
  				{
! 					item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
  					/*
! 					 * Sanity checks
  					 */
! 					if (item->operator.left <= 0 || i + item->operator.left >= size)
  						elog(ERROR, "invalid pointer to left operand");
  
! 					/* XXX: Though there's no way to construct a TSQuery that's
! 					 * not in polish notation, we don't enforce that for
! 					 * queries received from client in binary mode. Is there
! 					 * anything that relies on it?
! 					 *
! 					 * XXX: The tree could be malformed in other ways too,
! 					 * a node could have two parents, for example.
  					 */
  				}
  
  				if (i == size - 1)
--- 853,890 ----
  					item->operator.oper != OP_OR &&
  					item->operator.oper != OP_AND)
  					elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+ 
+ 				/*
+ 				 * Check that no previous operator node points to the right
+ 				 * operand. That would mean that the operand node
+ 				 * has two parents.
+ 				 */
+ 				if (bms_is_member(i + 1, parentset))
+ 					elog(ERROR, "malformed query tree");
+ 
+ 				parentset = bms_add_member(parentset, i + 1);
+ 
  				if(item->operator.oper != OP_NOT)
  				{
! 					uint32 left = (uint32) pq_getmsgint(buf, sizeof(uint32));
! 
  					/*
! 					 * Right operand is implicitly at "this+1". Don't allow
! 					 * left to point to the right operand, or to self.
  					 */
! 					if (left <= 1 || i + left >= size)
  						elog(ERROR, "invalid pointer to left operand");
  
! 					/*
! 					 * Check that no previous operator node points to the left
! 					 * operand.
  					 */
+ 					if (bms_is_member(i + left, parentset))
+ 						elog(ERROR, "malformed query tree");
+ 
+ 					parentset = bms_add_member(parentset, i + left);
+ 
+ 					item->operator.left = left;
  				}
  
  				if (i == size - 1)
***************
*** 871,876 ****
--- 897,907 ----
  		item++;
  	}
  
+ 	/* Now check that each node, except the root, has a parent. We
+ 	 * already checked above that no node has more than one parent. */
+ 	if (bms_num_members(parentset) != size - 1 && size != 0)
+ 		elog(ERROR, "malformed query tree");
+ 
  	query = (TSQuery) repalloc(query, len + datalen);
  
  	item = GETQUERY(query);
diff -c -r ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_cleanup.c ./src/backend/utils/adt/tsquery_cleanup.c
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_cleanup.c	2007-08-31 19:34:58.000000000 +0100
--- ./src/backend/utils/adt/tsquery_cleanup.c	2007-09-05 12:14:43.000000000 +0100
***************
*** 57,62 ****
--- 57,65 ----
  static void
  plainnode(PLAINTREE * state, NODE * node)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (state->cur == state->len)
  	{
  		state->len *= 2;
***************
*** 107,112 ****
--- 110,118 ----
  static void
  freetree(NODE * node)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (!node)
  		return;
  	if (node->left)
***************
*** 125,130 ****
--- 131,139 ----
  static NODE *
  clean_NOT_intree(NODE * node)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (node->valnode->type == QI_VAL)
  		return node;
  
***************
*** 203,208 ****
--- 212,220 ----
  	char		lresult = V_UNKNOWN,
  				rresult = V_UNKNOWN;
  
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (node->valnode->type == QI_VAL)
  		return node;
  	else 
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_rewrite.c	2007-08-31 22:08:41.000000000 +0100
--- ./src/backend/utils/adt/tsquery_rewrite.c	2007-09-05 12:18:46.000000000 +0100
***************
*** 22,27 ****
--- 22,30 ----
  static int
  addone(int *counters, int last, int total)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	counters[last]++;
  	if (counters[last] >= total)
  	{
***************
*** 173,178 ****
--- 176,184 ----
  static QTNode *
  dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	root = findeq(root, ex, subs, isfind);
  
  	if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsquery_util.c	2007-08-31 22:15:56.000000000 +0100
--- ./src/backend/utils/adt/tsquery_util.c	2007-09-05 12:21:29.000000000 +0100
***************
*** 22,27 ****
--- 22,30 ----
  {
  	QTNode	   *node = (QTNode *) palloc0(sizeof(QTNode));
  
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	node->valnode = in;
  
  	if (in->type == QI_OPR)
***************
*** 53,58 ****
--- 56,64 ----
  	if (!in)
  		return;
  
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
  		pfree(in->word);
  
***************
*** 79,84 ****
--- 85,93 ----
  int
  QTNodeCompare(QTNode * an, QTNode * bn)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (an->valnode->type != bn->valnode->type)
  		return (an->valnode->type > bn->valnode->type) ? -1 : 1;
  	
***************
*** 133,138 ****
--- 142,150 ----
  {
  	int			i;
  
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (in->valnode->type != QI_OPR)
  		return;
  
***************
*** 165,170 ****
--- 177,185 ----
  {
  	int			i;
  
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (in->valnode->type != QI_OPR)
  		return;
  
***************
*** 205,210 ****
--- 220,228 ----
  {
  	int			i;
  
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (in->valnode->type != QI_OPR)
  		return;
  
***************
*** 244,249 ****
--- 262,270 ----
  static void
  cntsize(QTNode * in, int *sumlen, int *nnode)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	*nnode += 1;
  	if (in->valnode->type == QI_OPR)
  	{
***************
*** 268,273 ****
--- 289,297 ----
  static void
  fillQT(QTN2QTState *state, QTNode *in)
  {
+ 	/* since this function recurses, it could be driven to stack overflow. */
+ 	check_stack_depth();
+ 
  	if (in->valnode->type == QI_VAL)
  	{
  		memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
***************
*** 325,331 ****
  QTNode *
  QTNCopy(QTNode *in)
  {
! 	QTNode	   *out = (QTNode *) palloc(sizeof(QTNode));
  
  	*out = *in;
  	out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
--- 349,360 ----
  QTNode *
  QTNCopy(QTNode *in)
  {
! 	QTNode	   *out;
! 
! 	/* since this function recurses, it could be driven to stack overflow. */
! 	check_stack_depth();
! 
! 	out = (QTNode *) palloc(sizeof(QTNode));
  
  	*out = *in;
  	out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
*** ../pgsql.tsearch-1/src/backend/utils/adt/tsrank.c	2007-08-31 22:24:34.000000000 +0100
--- ./src/backend/utils/adt/tsrank.c	2007-09-05 12:24:27.000000000 +0100
***************
*** 508,513 ****
--- 508,517 ----
  	int			i;
  	bool		found = false;
  
+ 	/* since this function recurses, it could be driven to stack overflow.
+ 	 * (though any decent compiler will optimize away the tail-recursion.   */
+ 	check_stack_depth();
+ 
  	reset_istrue_flag(query);
  
  	ext->p = 0x7fffffff;
*** ../pgsql.tsearch-1/src/include/tsearch/ts_type.h	2007-09-03 11:10:37.000000000 +0100
--- ./src/include/tsearch/ts_type.h	2007-09-05 12:17:02.000000000 +0100
***************
*** 159,165 ****
  typedef struct
  {
  	QueryItemType		type;	/* operand or kind of operator (ts_tokentype) */
! 	int8		weight;			/* weights of operand to search. Is this a bitmask? checkclass_str seems to think so */
  	int32	valcrc;				/* XXX: pg_crc32 would be a more appropriate data type, but we use comparisons to signed integers in the code. They would need to be changed as well. */
  
  	/* pointer to text value of operand, must correlate with WordEntry */
--- 159,172 ----
  typedef struct
  {
  	QueryItemType		type;	/* operand or kind of operator (ts_tokentype) */
! 
! 	/* Weights of operand to search. This is a bitmap:
! 	 * A: 1<<3
! 	 * B: 1<<2
! 	 * C: 1<<1
! 	 * D: 1<<0
! 	 */
! 	int8		weight;
  	int32	valcrc;				/* XXX: pg_crc32 would be a more appropriate data type, but we use comparisons to signed integers in the code. They would need to be changed as well. */
  
  	/* pointer to text value of operand, must correlate with WordEntry */
***************
*** 179,185 ****
  {
  	QueryItemType	type;
  	int8		oper;		/* see above */
! 	int16		left;		/* pointer to left operand. Right operand is
  							 * item + 1, left operand is placed
  							 * item+item->left */
  } QueryOperator;
--- 186,192 ----
  {
  	QueryItemType	type;
  	int8		oper;		/* see above */
! 	uint32		left;		/* pointer to left operand. Right operand is
  							 * item + 1, left operand is placed
  							 * item+item->left */
  } QueryOperator;
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to