*** parser.cc.orig	Thu Jul 20 11:35:01 2000
--- parser.cc	Mon Aug 21 13:37:31 2000
*************** Parser::expr(int output)
*** 97,103 ****
--- 97,105 ----
  	    term(output);
  	    if (output)
  	    {
+ 		if(debug) cerr << "or--" << endl;
  		perform_or();
+ 		if(debug) cerr << "stack:" << stack.Size() << endl;
  	    }
  	}
  	else
*************** Parser::expr(int output)
*** 113,133 ****
  void
  Parser::term(int output)
  {
-     int	isand;
      
      factor(output);
      while (1)
      {
! 	if ((isand = match('&')) || match('!'))
  	{
! 	    factor(output);
! 	    if (output)
! 	    {
! 		perform_and(isand);
! 	    }
  	}
  	else
! 	    break;
      }
  }
  
--- 115,149 ----
  void
  Parser::term(int output)
  {
      
      factor(output);
+ 	if(debug) cerr << "term:factor" << endl;
      while (1)
      {
! 	if(match('&'))
  	{
! 		factor(output);
! 		if(output)
! 		{
! 			if(debug) cerr << "and--" << endl;
! 			perform_and();
! 			if(debug) cerr << "stack:" << stack.Size() << endl;
! 		}
! 	}
! 	else if(match('!'))
! 	{
! 		factor(output);
! 		if(output)
! 		{
! 			if(debug) cerr << "not--" << endl;
! 			perform_not();
! 			if(debug) cerr << "stack:" << stack.Size() << endl;
! 		}
  	}
  	else
! 	{
! 		break;
! 	}
      }
  }
  
*************** Parser::term(int output)
*** 135,143 ****
  void
  Parser::factor(int output)
  {
!     phrase(output);
! 
!     if (match('('))
      {
  	expr(output);
  	if (match(')'))
--- 151,161 ----
  void
  Parser::factor(int output)
  {
!     if(match('"'))
!     {
! 	phrase(output);
!     }
!     else if (match('('))
      {
  	expr(output);
  	if (match(')'))
*************** Parser::factor(int output)
*** 157,175 ****
  	}
  	lookahead = lexan();
      }
!     //    else
!     //    {
!     //	setError("a search word");
!     //    }
  }
  
  //*****************************************************************************
  void
  Parser::phrase(int output)
  {
!   if (match('"'))
!     {
!       List *wordList = new List;
        double weight = 1.0;
  
        while (1)
--- 175,191 ----
  	}
  	lookahead = lexan();
      }
!     else
!     {
!     	setError("a search word, a quoted phrase, a boolean expression between ()");
!     }
  }
  
  //*****************************************************************************
  void
  Parser::phrase(int output)
  {
!       List *wordList = 0;
        double weight = 1.0;
  
        while (1)
*************** Parser::phrase(int output)
*** 177,197 ****
  	  if (match('"'))
  	    {
  	      if (output)
  		score(wordList, weight);
  	      break;
  	    }
  	  else if (lookahead == WORD)
  	    {
  	      weight *= current->weight;
  	      if (output)
! 		perform_phrase(*wordList);
  	      
  	      lookahead = lexan();
  	    }
  
  	} // end while
!       delete wordList;
!     } // end if
  }
  
  //*****************************************************************************
--- 193,221 ----
  	  if (match('"'))
  	    {
  	      if (output)
+ 	      {
+                 if(!wordList) wordList = new List;
+ 		if(debug) cerr << "scoring phrase" << endl;
  		score(wordList, weight);
+ 	      }
  	      break;
  	    }
  	  else if (lookahead == WORD)
  	    {
  	      weight *= current->weight;
  	      if (output)
! 		perform_phrase(wordList);
  	      
  	      lookahead = lexan();
  	    }
+           else if (lookahead == DONE)
+            {
+              setError("missing quote");
+ 	     break;
+            }
  
  	} // end while
! 	if(wordList) delete wordList;
  }
  
  //*****************************************************************************
*************** Parser::perform_push()
*** 244,254 ****
--- 268,284 ----
      String	temp = current->word.get();
      char	*p;
  
+     if(debug)
+ 	cerr << "perform_push @"<< stack.Size() << ": " << temp << endl;
      if (current->isIgnore)
      {
+ 	if(debug) cerr << "ignore: " << temp << " @" << stack.Size() << endl;
  	//
  	// This word needs to be ignored.  Make it so.
  	//
+     	ResultList	*list = new ResultList;
+ 	list->isIgnore = 1;
+     	stack.push(list);
  	return;
      }
  
*************** Parser::perform_push()
*** 264,282 ****
  
  //*****************************************************************************
  void
! Parser::perform_phrase(List &oldWords)
  {
      static int	maximum_word_length = config.Value("maximum_word_length", 12);
      String	temp = current->word.get();
      char	*p;
      List	*newWords = 0;
      HtWordReference *oldWord, *newWord;
  
      if (current->isIgnore)
      {
  	//
  	// This word needs to be ignored.  Make it so.
  	//
  	return;
      }
  
--- 294,324 ----
  
  //*****************************************************************************
  void
! Parser::perform_phrase(List * &oldWords)
  {
      static int	maximum_word_length = config.Value("maximum_word_length", 12);
      String	temp = current->word.get();
      char	*p;
      List	*newWords = 0;
      HtWordReference *oldWord, *newWord;
+     static int	hole = 0;
  
+     // if the query is empty, no further effort is needed
+     if(oldWords && oldWords->Count() == 0)
+     {
+ 	if(debug) cerr << "phrase not found, skip" << endl;
+ 	return;
+     }
+ 
+     if(debug) cerr << "phrase current: " << temp << endl;
      if (current->isIgnore)
      {
  	//
  	// This word needs to be ignored.  Make it so.
  	//
+ 	if(debug) cerr << "ignoring: " << temp << endl;
+ 	// increase the count of ignored words between 'good' words
+ 	hole++;
  	return;
      }
  
*************** Parser::perform_phrase(List &oldWords)
*** 286,312 ****
  	p[maximum_word_length] = '\0';
  
      newWords = words[p];
  
      // If we don't have a prior list of words, we want this one...
!     if (oldWords.Count() == 0)
        {
  	newWords->Start_Get();
  	while ((newWord = (HtWordReference *) newWords->Get_Next()))
! 	  oldWords.Add(newWord);
  	return;
        }
  
      // OK, now we have a previous list in wordList and a new list
      List	*results = new List;
  
!     oldWords.Start_Get();
!     while ((oldWord = (HtWordReference *) oldWords.Get_Next()))
        {
  	newWords->Start_Get();
  	while ((newWord = (HtWordReference *) newWords->Get_Next()))
  	  {
  	    if (oldWord->DocID() == newWord->DocID())
! 	      if ((oldWord->Location() + 1) == newWord->Location())
  		{
  		  HtWordReference *result = new HtWordReference(*oldWord);
  
--- 328,362 ----
  	p[maximum_word_length] = '\0';
  
      newWords = words[p];
+     if(debug) cerr << "new words count: " << newWords->Count() << endl;
  
      // If we don't have a prior list of words, we want this one...
!     if (!oldWords)
        {
+         oldWords = new List;
+ 	if(debug) cerr << "phrase adding first: " << temp << endl;
  	newWords->Start_Get();
  	while ((newWord = (HtWordReference *) newWords->Get_Next()))
! 	{
! 	  oldWords->Add(newWord);
! 	}
! 	if(debug) cerr << "old words count: " << oldWords->Count() << endl;
! 	// reset the ignored word count since last word was not ignored
! 	hole = 0;
  	return;
        }
  
      // OK, now we have a previous list in wordList and a new list
      List	*results = new List;
  
!     oldWords->Start_Get();
!     while ((oldWord = (HtWordReference *) oldWords->Get_Next()))
        {
  	newWords->Start_Get();
  	while ((newWord = (HtWordReference *) newWords->Get_Next()))
  	  {
  	    if (oldWord->DocID() == newWord->DocID())
! 	      if ((oldWord->Location() + 1 + hole) == newWord->Location())
  		{
  		  HtWordReference *result = new HtWordReference(*oldWord);
  
*************** Parser::perform_phrase(List &oldWords)
*** 318,339 ****
  	  }
        }
  
!     oldWords.Destroy();
      results->Start_Get();
      while ((newWord = (HtWordReference *) results->Get_Next()))
!       oldWords.Add(newWord);
      results->Release();
      delete results;
  
      newWords->Destroy();
      delete newWords;
  }
  
  //*****************************************************************************
  void
  Parser::score(List *wordList, double weight)
  {
-     ResultList	*list = new ResultList;
      DocMatch	*dm;
      HtWordReference *wr;
      static double text_factor = config.Double("text_factor", 1);
--- 368,396 ----
  	  }
        }
  
!     if(debug) cerr << "old words count: " << oldWords->Count() << endl;
!     if(debug) cerr << "results count: " << results->Count() << endl;
!     oldWords->Destroy();
      results->Start_Get();
      while ((newWord = (HtWordReference *) results->Get_Next()))
!     {
!       oldWords->Add(newWord);
!     }
!     if(debug) cerr << "old words count: " << oldWords->Count() << endl;
      results->Release();
      delete results;
  
      newWords->Destroy();
      delete newWords;
+ 
+     // reset the ignored word count since last word was not ignored
+     hole = 0;
  }
  
  //*****************************************************************************
  void
  Parser::score(List *wordList, double weight)
  {
      DocMatch	*dm;
      HtWordReference *wr;
      static double text_factor = config.Double("text_factor", 1);
*************** Parser::score(List *wordList, double wei
*** 348,362 ****
      int		  docanchor;
      int		  word_count;
  
-     stack.push(list);
  
      if (!wordList || wordList->Count() == 0)
        {
! 	// We can't score an empty list, so this should be ignored...
! 	list->isIgnore = 1;
  	return;
        }
  
      // We're now guaranteed to have a non-empty list
      // We'll use the number of occurences of this word for scoring
      word_count = wordList->Count();
--- 405,423 ----
      int		  docanchor;
      int		  word_count;
  
  
      if (!wordList || wordList->Count() == 0)
        {
! 	// We can't score an empty list, so push a null pointer...
! 	if(debug) cerr << "score: empty list, push 0 @" << stack.Size() << endl;
! 
! 	stack.push(0);
  	return;
        }
  
+     ResultList	*list = new ResultList;
+     if(debug) cerr << "score: push @" << stack.Size() << endl;
+     stack.push(list);
      // We're now guaranteed to have a non-empty list
      // We'll use the number of occurences of this word for scoring
      word_count = wordList->Count();
*************** Parser::score(List *wordList, double wei
*** 401,460 ****
  //*****************************************************************************
  // The top two entries in the stack need to be ANDed together.
  //
  void
! Parser::perform_and(int isand)
  {
      ResultList		*l1 = (ResultList *) stack.pop();
      ResultList		*l2 = (ResultList *) stack.pop();
-     ResultList		*result = new ResultList;
      int			i;
      DocMatch		*dm, *dm2, *dm3;
      HtVector		*elements;
  
!     //
!     // If either of the arguments is not present, we will use the other as
!     // the result.
!     //
!     if (!l1 && l2)
!     {
! 	stack.push(l2);
! 	return;
!     }
!     else if (l1 && !l2)
      {
! 	stack.push(l1);
  	return;
      }
!     else if (!l1 && !l2)
!     {
! 	stack.push(result);
! 	return;
!     }
!     
      //
      // If either of the arguments is set to be ignored, we will use the
      // other as the result.
!     //
!     if (l1->isIgnore)
      {
  	stack.push(l2);
  	delete l1;
  	return;
      }
      else if (l2->isIgnore)
      {
! 	stack.push(isand ? l1 : result);
  	delete l2;
  	return;
      }
      
      stack.push(result);
      elements = l2->elements();
      for (i = 0; i < elements->Count(); i++)
      {
  	dm = (DocMatch *) (*elements)[i];
  	dm2 = l1->find(dm->id);
! 	if (dm2 ? isand : (isand == 0))
  	{
  	    //
  	    // Duplicate document.  We just need to add the scored together.
--- 462,536 ----
  //*****************************************************************************
  // The top two entries in the stack need to be ANDed together.
  //
+ //	a	b	a and b
+ //	0	0	0
+ //	0	1	0
+ //	0	x	0
+ //	1	0	0
+ //	1	1	intersect(a,b)
+ //	1	x	a
+ //	x	0	0
+ //	x	1	b
+ //	x	x	x
+ //
  void
! Parser::perform_and()
  {
      ResultList		*l1 = (ResultList *) stack.pop();
      ResultList		*l2 = (ResultList *) stack.pop();
      int			i;
      DocMatch		*dm, *dm2, *dm3;
      HtVector		*elements;
  
!     if(!(l2 && l1))
      {
! 	if(debug) cerr << "and: at least one empty operator, pushing 0 @" << stack.Size() << endl;
! 	stack.push(0);
! 	if(l1) delete l1;
! 	if(l2) delete l2;
  	return;
      }
! 
      //
      // If either of the arguments is set to be ignored, we will use the
      // other as the result.
!     // remember l2 and l1, l2 not l1
! 
!     if (l1->isIgnore && l2->isIgnore)
      {
+ 	if(debug) cerr << "and: ignoring all, pushing ignored list @" << stack.Size() << endl;
+     	ResultList *result = new ResultList;
+ 	result->isIgnore = 1;
+ 	delete l1; delete l2;
+ 	stack.push(result);
+     }
+     else if (l1->isIgnore)
+     {
+ 	if(debug) cerr << "and: ignoring l1, pushing l2 @" << stack.Size() << endl;
  	stack.push(l2);
  	delete l1;
  	return;
      }
      else if (l2->isIgnore)
      {
! 	if(debug) cerr << "and: ignoring l2, pushing l2 @" << stack.Size() <<  endl;
! 	stack.push(l1);
  	delete l2;
  	return;
      }
      
+     ResultList		*result = new ResultList;
      stack.push(result);
      elements = l2->elements();
+ 
+     if(debug)
+ 	cerr << "perform and: " << elements->Count() << " " << l1->elements()->Count() << " ";
+ 
      for (i = 0; i < elements->Count(); i++)
      {
  	dm = (DocMatch *) (*elements)[i];
  	dm2 = l1->find(dm->id);
! 	if (dm2)
  	{
  	    //
  	    // Duplicate document.  We just need to add the scored together.
*************** Parser::perform_and(int isand)
*** 468,473 ****
--- 544,620 ----
  	    result->add(dm3);
  	}
      }
+     if(debug)
+ 	cerr << result->elements()->Count() << endl;
+ 
+     elements->Release();
+     delete elements;
+     delete l1;
+     delete l2;
+ }
+ 
+ //	a	b	a not b
+ //	0	0	0
+ //	0	1	0
+ //	0	x	0
+ //	1	0	a
+ //	1	1	intersect(a,not b)
+ //	1	x	a
+ //	x	0	x
+ //	x	1	x
+ //	x	x	x
+ void
+ Parser::perform_not()
+ {
+     ResultList		*l1 = (ResultList *) stack.pop();
+     ResultList		*l2 = (ResultList *) stack.pop();
+     int			i;
+     DocMatch		*dm, *dm2, *dm3;
+     HtVector		*elements;
+ 
+ 
+     if(!l2)
+     {
+ 	if(debug) cerr << "not: no positive term, pushing 0 @" << stack.Size() << endl;
+ 	stack.push(0);
+ 	if(l1) delete l1;
+ 	return;
+     }
+     if(!l1 || l1->isIgnore || l2->isIgnore)
+     {
+ 	if(debug) cerr << "not: no negative term, pushing positive @" << stack.Size() << endl;
+         stack.push(l2);
+ 	if(l1) delete l1;
+         return;
+     }
+ 
+     ResultList		*result = new ResultList;
+     if(debug) cerr << "not: pushing result @" << stack.Size() << endl;
+     stack.push(result);
+     elements = l2->elements();
+ 
+     if(debug)
+ 	cerr << "perform not: " << elements->Count() << " " << l1->elements()->Count() << " ";
+ 
+     for (i = 0; i < elements->Count(); i++)
+     {
+ 	dm = (DocMatch *) (*elements)[i];
+ 	dm2 = l1->find(dm->id);
+ 	if (!dm2)
+ 	{
+ 	    //
+ 	    // Duplicate document.  We just need to add the scored together.
+ 	    //
+ 	    dm3 = new DocMatch;
+ 	    dm3->score = dm->score;
+ 	    dm3->id = dm->id;
+ 	    dm3->anchor = dm->anchor;
+ 	    result->add(dm3);
+ 	}
+     }
+     if(debug)
+ 	cerr << result->elements()->Count() << endl;
+ 
      elements->Release();
      delete elements;
      delete l1;
*************** Parser::perform_or()
*** 492,507 ****
      //
      if (!l1 && result)
      {
! 	return;
      }
      else if (l1 && !result)
      {
  	stack.push(l1);
  	return;
      }
      else if (!l1 & !result)
      {
! 	stack.push(new ResultList);
  	return;
      }
      
--- 639,659 ----
      //
      if (!l1 && result)
      {
! 	if(debug) cerr << "or: no 2nd operand" << endl;
! 	return; // result in top of stack
      }
      else if (l1 && !result)
      {
+ 	if(debug) cerr << "or: no 1st operand" << endl;
+ 	stack.pop();
  	stack.push(l1);
  	return;
      }
      else if (!l1 & !result)
      {
! 	if(debug) cerr << "or: no operands" << endl;
! 	stack.pop();
! 	stack.push(0); // empty result
  	return;
      }
      
*************** Parser::perform_or()
*** 523,528 ****
--- 675,682 ----
      }
      
      elements = l1->elements();
+     if(debug)
+ 	cerr << "perform or: " << elements->Count() << " " << result->elements()->Count() << " ";
      for (i = 0; i < elements->Count(); i++)
      {
  	dm = (DocMatch *) (*elements)[i];
*************** Parser::perform_or()
*** 545,550 ****
--- 699,706 ----
  	    result->add(dm2);
  	}
      }
+     if(debug)
+ 	cerr << result->elements()->Count() << endl;
      elements->Release();
      delete elements;
      delete l1;
*************** Parser::parse(List *tokenList, ResultLis
*** 562,570 ****
      ResultList	*result = (ResultList *) stack.pop();
      if (!result)  // Ouch!
        {
! 	valid = 0;
  	error = 0;
! 	error << "Expected to have something to parse!";
  	return;
        }
      HtVector	*elements = result->elements();
--- 718,726 ----
      ResultList	*result = (ResultList *) stack.pop();
      if (!result)  // Ouch!
        {
! //	valid = 0;
  	error = 0;
! //	error << "Expected to have something to parse!";
  	return;
        }
      HtVector	*elements = result->elements();
