Author: rande
Date: 2010-01-22 18:40:27 +0100 (Fri, 22 Jan 2010)
New Revision: 27063
Modified:
plugins/sfSolrPlugin/branches/sf1.2/lib/util/sfLuceneCriteria.class.php
plugins/sfSolrPlugin/branches/sf1.2/test/unit/util/sfLuceneCriteriaTest.php
Log:
[sfSolrPlugin] add query analyser/guesser
Modified:
plugins/sfSolrPlugin/branches/sf1.2/lib/util/sfLuceneCriteria.class.php
===================================================================
--- plugins/sfSolrPlugin/branches/sf1.2/lib/util/sfLuceneCriteria.class.php
2010-01-22 17:28:24 UTC (rev 27062)
+++ plugins/sfSolrPlugin/branches/sf1.2/lib/util/sfLuceneCriteria.class.php
2010-01-22 17:40:27 UTC (rev 27063)
@@ -36,7 +36,8 @@
const
TYPE_NONE = '',
TYPE_AND = 'AND',
- TYPE_OR = 'OR';
+ TYPE_OR = 'OR',
+ TYPE_DEFAULT = ' '; // default use the default separator defined in the
schema.xml file
public function __construct()
@@ -176,6 +177,153 @@
return $this;
}
+ public function guessParts($phrase)
+ {
+ // initialize variable
+ $phrase = trim($phrase);
+ $phrase = str_replace('""', '', $phrase);
+ $phrase = str_replace('\'\'', '', $phrase);
+
+ $default_separators = array(' ', ',');
+ $phrase_separators = array('\'', '"');
+ $separators = array_merge($default_separators, $phrase_separators);
+ $separator = $default_separators;
+
+ $current_phrase = "";
+ $contains = 'contains';
+
+ $parts = array(
+ 'must_contains' => array(),
+ 'must_not_contains' => array(),
+ 'contains' => array(),
+ );
+
+ for($i = 0; $i < strlen($phrase); $i++)
+ {
+ $char = $phrase{$i};
+
+
+ if(strlen($current_phrase) == 0)
+ {
+
+ if($char == '-')
+ {
+ $contains = 'must_not_contains';
+ continue;
+ }
+ elseif($char == '+')
+ {
+ $contains = 'must_contains';
+ continue;
+ }
+
+ if(in_array($char, $phrase_separators))
+ {
+ $separator = array($char);
+ continue;
+ }
+ else if(in_array($char, $default_separators))
+ {
+ $separator = $default_separators;
+
+ continue;
+ }
+ }
+
+ // end of the phrase
+ if(strlen($current_phrase) > 0 && (in_array($char, $separator) ||
strlen($phrase) - 1 == $i))
+ {
+
+ if(strlen($phrase) - 1 == $i && !in_array($char, $separator))
+ {
+ $current_phrase .= $char;
+ }
+
+ $parts[$contains][] = $current_phrase;
+
+ // restore default values
+ $contains = 'contains';
+ $current_phrase = '';
+ $separator = $default_separators;
+
+ continue;
+ }
+
+ $current_phrase .= $char;
+ }
+
+ return $parts;
+ }
+ /**
+ * This method try to parse the provided $phrase into a valid solr query
+ * The method can handle +/- and quote grouping
+ *
+ * @param string $phrase
+ * @return sfLuceneCriteria
+ */
+ public function addPhraseGuess($full_phrase)
+ {
+
+ foreach($this->guessParts($full_phrase) as $section => $phrases)
+ {
+ if(count($phrases) == 0)
+ {
+
+ continue;
+ }
+
+ $inner_type = ($section == 'must_contains' || $section ==
'must_not_contains') ? 'AND' : 'OR';
+ $sign = ($section == 'must_contains' ? '+' : ($section ==
'must_not_contains' ? '-' : ''));
+
+ $c = new sfLuceneCriteria;
+ foreach($phrases as $phrase)
+ {
+ $c->addPhrase($phrase, $inner_type);
+ }
+
+ $this->add($sign.'('.$c->getQuery().')', 'AND', true);
+ }
+
+ return $this;
+ }
+
+ /**
+ * This method try to parse the provided $phrase into a valid solr query
+ * The method can handle +/- and quote grouping
+ *
+ * @param string $field
+ * @param string $phrase
+ * @return sfLuceneCriteria
+ */
+ public function addPhraseFieldGuess($field, $full_phrase, $type =
sfLuceneCriteria::TYPE_AND)
+ {
+
+ $main_criteria = new sfLuceneCriteria;
+ foreach($this->guessParts($full_phrase) as $section => $phrases)
+ {
+ if(count($phrases) == 0)
+ {
+
+ continue;
+ }
+
+ $inner_type = ($section == 'must_contains' || $section ==
'must_not_contains') ? 'AND' : 'OR';
+ $sign = ($section == 'must_contains' ? '+' : ($section ==
'must_not_contains' ? '-' : ''));
+
+ $c = new sfLuceneCriteria;
+ foreach($phrases as $phrase)
+ {
+ $c->addPhrase($phrase, $inner_type);
+ }
+
+ $main_criteria->add($sign.'('.$c->getQuery().')', 'AND', true);
+ }
+
+ $this->addField($field, $main_criteria, $type, true);
+
+ return $this;
+ }
+
public function addField($field, $query, $type = sfLuceneCriteria::TYPE_AND,
$force = false)
{
if($query = $this->checkQueryFragment($query, $force))
@@ -272,7 +420,7 @@
* @param string $type : OR | AND operator
* @return sfLuceneCriteria
*/
- public function addPhrase($phrase, $type = sfLuceneCriteria::TYPE_AND)
+ public function addPhrase($phrase, $type = sfLuceneCriteria::TYPE_AND,
$clever = false)
{
return $this->add(self::sanitize($phrase), $type, true);
@@ -610,7 +758,7 @@
*/
public static function sanitize($keyword)
{
-
+ $keyword = str_replace('"', '', $keyword);
return sfLuceneApacheSolrService::phrase($keyword);
}
}
Modified:
plugins/sfSolrPlugin/branches/sf1.2/test/unit/util/sfLuceneCriteriaTest.php
===================================================================
--- plugins/sfSolrPlugin/branches/sf1.2/test/unit/util/sfLuceneCriteriaTest.php
2010-01-22 17:28:24 UTC (rev 27062)
+++ plugins/sfSolrPlugin/branches/sf1.2/test/unit/util/sfLuceneCriteriaTest.php
2010-01-22 17:40:27 UTC (rev 27063)
@@ -86,7 +86,7 @@
$criteria->addSane('&" ? \unsafe');
$s = $criteria->getQuery();
-$t->cmp_ok($s, '===', '("test") AND ("&\\"" OR "?" OR "\\\\unsafe")',
'::addSane() with standard string');
+$t->cmp_ok($s, '===', '("test") AND ("&" OR "?" OR "\\\\unsafe")',
'::addSane() with standard string');
try {
$criteria->add('carl!');
@@ -168,3 +168,31 @@
$expected = 'toto AND pipop';
$t->cmp_ok($s, '===', $expected, '->add() with empty string');
+$t->diag('testing addPhraseGuess()');
+
+$s = inst()->addPhraseGuess('Thomas -"zend framework"')->getQuery();
+$expected = '-("zend framework") AND ("Thomas")';
+$t->cmp_ok($s, '===', $expected, '->addPhraseGuess()');
+
+$s = inst()->addPhraseGuess('"Thomas" -"zend framework"')->getQuery();
+$expected = '-("zend framework") AND ("Thomas")';
+$t->cmp_ok($s, '===', $expected, '->addPhraseGuess()');
+
+$s = inst()->addPhraseGuess('"Thomas" -.zend')->getQuery();
+$expected = '-(".zend") AND ("Thomas")';
+$t->cmp_ok($s, '===', $expected, '->addPhraseGuess()');
+
+$s = inst()->addPhraseGuess('Thomas Rabaix +"symfony expert" -"zend framework"
+javascript -.net')->getQuery();
+$expected = '+("symfony expert" AND "javascript") AND -("zend framework" AND
".net") AND ("Thomas" OR "Rabaix")';
+$t->cmp_ok($s, '===', $expected, '->addPhraseGuess()');
+
+$s = inst()->addPhraseGuess('Thomas Rabaix +"sym"fony expert" -"zen-d
framework" +javascript -.net')->getQuery();
+$expected = '+("sym" AND "javascript") AND -("zen-d framework" AND ".net") AND
("Thomas" OR "Rabaix" OR "fony" OR "expert")';
+$t->cmp_ok($s, '===', $expected, '->addPhraseGuess()');
+
+$s = inst()->addPhraseFieldGuess('name', 'Thomas Rabaix +"sym"fony expert"
-"zen-d framework" +javascript -.net')->getQuery();
+$expected = 'name:((+("sym" AND "javascript") AND -("zen-d framework" AND
".net") AND ("Thomas" OR "Rabaix" OR "fony" OR "expert")))';
+$t->cmp_ok($s, '===', $expected, '->addPhraseGuess()');
+
+
+
--
You received this message because you are subscribed to the Google Groups
"symfony SVN" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/symfony-svn?hl=en.