http://www.mediawiki.org/wiki/Special:Code/MediaWiki/58250
Revision: 58250 Author: maxsem Date: 2009-10-28 10:29:22 +0000 (Wed, 28 Oct 2009) Log Message: ----------- SearchSqlite class, various tweaks Modified Paths: -------------- branches/sqlite/includes/AutoLoader.php branches/sqlite/includes/db/DatabaseSqlite.php branches/sqlite/maintenance/sqlite/archives/patch-fts3-searchindex.sql branches/sqlite/maintenance/sqlite/archives/patch-searchindex-no-fts.sql Added Paths: ----------- branches/sqlite/includes/search/SearchSqlite.php Modified: branches/sqlite/includes/AutoLoader.php =================================================================== --- branches/sqlite/includes/AutoLoader.php 2009-10-28 09:10:01 UTC (rev 58249) +++ branches/sqlite/includes/AutoLoader.php 2009-10-28 10:29:22 UTC (rev 58250) @@ -505,8 +505,10 @@ 'SearchResult' => 'includes/search/SearchEngine.php', 'SearchResultSet' => 'includes/search/SearchEngine.php', 'SearchResultTooMany' => 'includes/search/SearchEngine.php', + 'SearchSqlite' => 'includes/search/SearchSqlite.php', 'SearchUpdate' => 'includes/search/SearchUpdate.php', 'SearchUpdateMyISAM' => 'includes/search/SearchUpdate.php', + 'SqliteSearchResultSet' => 'includes/search/SearchSqlite.php', # includes/specials 'SpecialAllmessages' => 'includes/specials/SpecialAllmessages.php', Modified: branches/sqlite/includes/db/DatabaseSqlite.php =================================================================== --- branches/sqlite/includes/db/DatabaseSqlite.php 2009-10-28 09:10:01 UTC (rev 58249) +++ branches/sqlite/includes/db/DatabaseSqlite.php 2009-10-28 10:29:22 UTC (rev 58250) @@ -100,7 +100,7 @@ * Returns version of currently supported SQLite fulltext search module or false if none present. * @return String */ - function fulltextSearchEngine() { + function fulltextSearchModule() { $table = 'dummy_search_test'; $this->query( "DROP TABLE IF EXISTS $table", __METHOD__ ); if ( $this->query( "CREATE VIRTUAL TABLE $table USING FTS3(dummy_field)", __METHOD__, true ) ) { Added: branches/sqlite/includes/search/SearchSqlite.php =================================================================== --- branches/sqlite/includes/search/SearchSqlite.php (rev 0) +++ branches/sqlite/includes/search/SearchSqlite.php 2009-10-28 10:29:22 UTC (rev 58250) @@ -0,0 +1,350 @@ +<?php +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# http://www.gnu.org/copyleft/gpl.html + +/** + * @file + * @ingroup Search + */ + +/** + * Search engine hook for SQLite + * @ingroup Search + */ +class SearchSqlite extends SearchEngine { + var $strictMatching = true; + + /** @todo document */ + function __construct( $db ) { + $this->db = $db; + } + + /** + * Parse the user's query and transform it into an SQL fragment which will + * become part of a WHERE clause + */ + function parseQuery( $filteredText, $fulltext ) { + global $wgContLang; + $lc = SearchEngine::legalSearchChars(); // Minus format chars + $searchon = ''; + $this->searchTerms = array(); + + # FIXME: This doesn't handle parenthetical expressions. + $m = array(); + if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + $filteredText, $m, PREG_SET_ORDER ) ) { + foreach( $m as $bits ) { + @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits; + + if( $nonQuoted != '' ) { + $term = $nonQuoted; + $quote = ''; + } else { + $term = str_replace( '"', '', $term ); + $quote = '"'; + } + + if( $searchon !== '' ) $searchon .= ' '; + if( $this->strictMatching && ($modifier == '') ) { + // If we leave this out, boolean op defaults to OR which is rarely helpful. + $modifier = '+'; + } + + // Some languages such as Serbian store the input form in the search index, + // so we may need to search for matches in multiple writing system variants. + $convertedVariants = $wgContLang->autoConvertToAllVariants( $term ); + if( is_array( $convertedVariants ) ) { + $variants = array_unique( array_values( $convertedVariants ) ); + } else { + $variants = array( $term ); + } + + // The low-level search index does some processing on input to work + // around problems with minimum lengths and encoding in MySQL's + // fulltext engine. + // For Chinese this also inserts spaces between adjacent Han characters. + $strippedVariants = array_map( + array( $wgContLang, 'stripForSearch' ), + $variants ); + + // Some languages such as Chinese force all variants to a canonical + // form when stripping to the low-level search index, so to be sure + // let's check our variants list for unique items after stripping. + $strippedVariants = array_unique( $strippedVariants ); + + $searchon .= $modifier; + if( count( $strippedVariants) > 1 ) + $searchon .= '('; + foreach( $strippedVariants as $stripped ) { + if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { + // Hack for Chinese: we need to toss in quotes for + // multiple-character phrases since stripForSearch() + // added spaces between them to make word breaks. + $stripped = '"' . trim( $stripped ) . '"'; + } + $searchon .= "$quote$stripped$quote$wildcard "; + } + if( count( $strippedVariants) > 1 ) + $searchon .= ')'; + + // Match individual terms or quoted phrase in result highlighting... + // Note that variants will be introduced in a later stage for highlighting! + $regexp = $this->regexTerm( $term, $wildcard ); + $this->searchTerms[] = $regexp; + } + wfDebug( __METHOD__ . ": Would search with '$searchon'\n" ); + wfDebug( __METHOD__ . ': Match with /' . implode( '|', $this->searchTerms ) . "/\n" ); + } else { + wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" ); + } + + $searchon = $this->db->strencode( $searchon ); + $field = $this->getIndexField( $fulltext ); + return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) "; + } + + function regexTerm( $string, $wildcard ) { + global $wgContLang; + + $regex = preg_quote( $string, '/' ); + if( $wgContLang->hasWordBreaks() ) { + if( $wildcard ) { + // Don't cut off the final bit! + $regex = "\b$regex"; + } else { + $regex = "\b$regex\b"; + } + } else { + // For Chinese, words may legitimately abut other words in the text literal. + // Don't add \b boundary checks... note this could cause false positives + // for latin chars. + } + return $regex; + } + + public static function legalSearchChars() { + return "\"*" . parent::legalSearchChars(); + } + + /** + * Perform a full text search query and return a result set. + * + * @param $term String: raw search term + * @return SqliteSearchResultSet + */ + function searchText( $term ) { + return $this->searchInternal( $term, true ); + } + + /** + * Perform a title-only search query and return a result set. + * + * @param $term String: raw search term + * @return SqliteSearchResultSet + */ + function searchTitle( $term ) { + return $this->searchInternal( $term, false ); + } + + protected function searchInternal( $term, $fulltext ) { + global $wgSearchMySQLTotalHits; + + $filteredTerm = $this->filter( $term ); + $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) ); + + $total = null; + if( $wgSearchMySQLTotalHits ) { + $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) ); + $row = $totalResult->fetchObject(); + if( $row ) { + $total = intval( $row->c ); + } + $totalResult->free(); + } + + return new SqliteSearchResultSet( $resultSet, $this->searchTerms, $total ); + } + + + /** + * Return a partial WHERE clause to exclude redirects, if so set + * @return String + */ + function queryRedirect() { + if( $this->showRedirects ) { + return ''; + } else { + return 'AND page_is_redirect=0'; + } + } + + /** + * Return a partial WHERE clause to limit the search to the given namespaces + * @return String + */ + function queryNamespaces() { + if( is_null($this->namespaces) ) + return ''; # search all + if ( !count( $this->namespaces ) ) { + $namespaces = '0'; + } else { + $namespaces = $this->db->makeList( $this->namespaces ); + } + return 'AND page_namespace IN (' . $namespaces . ')'; + } + + /** + * Return a LIMIT clause to limit results on the query. + * @return String + */ + function queryLimit() { + return $this->db->limitResult( '', $this->limit, $this->offset ); + } + + /** + * Does not do anything for generic search engine + * subclasses may define this though + * @return String + */ + function queryRanking( $filteredTerm, $fulltext ) { + return ''; + } + + /** + * Construct the full SQL query to do the search. + * The guts shoulds be constructed in queryMain() + * @param $filteredTerm String + * @param $fulltext Boolean + */ + function getQuery( $filteredTerm, $fulltext ) { + return $this->queryMain( $filteredTerm, $fulltext ) . ' ' . + $this->queryRedirect() . ' ' . + $this->queryNamespaces() . ' ' . + $this->queryRanking( $filteredTerm, $fulltext ) . ' ' . + $this->queryLimit(); + } + + /** + * Picks which field to index on, depending on what type of query. + * @param $fulltext Boolean + * @return String + */ + function getIndexField( $fulltext ) { + return $fulltext ? 'si_text' : 'si_title'; + } + + /** + * Get the base part of the search query. + * The actual match syntax will depend on the server + * version; MySQL 3 and MySQL 4 have different capabilities + * in their fulltext search indexes. + * + * @param $filteredTerm String + * @param $fulltext Boolean + * @return String + */ + function queryMain( $filteredTerm, $fulltext ) { + $match = $this->parseQuery( $filteredTerm, $fulltext ); + $page = $this->db->tableName( 'page' ); + $searchindex = $this->db->tableName( 'searchindex' ); + return 'SELECT $searchindex.rowid, page_namespace, page_title ' . + "FROM $page,$searchindex " . + "WHERE page_id=$searchindex.rowid AND $match"; + } + + function getCountQuery( $filteredTerm, $fulltext ) { + $match = $this->parseQuery( $filteredTerm, $fulltext ); + $page = $this->db->tableName( 'page' ); + $searchindex = $this->db->tableName( 'searchindex' ); + return "SELECT COUNT(*) AS c " . + "FROM $page,$searchindex " . + "WHERE page_id=$searchindex.rowid AND $match" . + $this->queryRedirect() . ' ' . + $this->queryNamespaces(); + } + + /** + * Create or update the search index record for the given page. + * Title and text should be pre-processed. + * + * @param $id Integer + * @param $title String + * @param $text String + */ + function update( $id, $title, $text ) { + $dbw = wfGetDB( DB_MASTER ); + $dbw->replace( 'searchindex', + array( 'rowid' ), + array( + 'rowid' => $id, + 'si_title' => $title, + 'si_text' => $text + ), __METHOD__ ); + } + + /** + * Update a search index record's title only. + * Title should be pre-processed. + * + * @param $id Integer + * @param $title String + */ + function updateTitle( $id, $title ) { + $dbw = wfGetDB( DB_MASTER ); + + $dbw->update( 'searchindex', + array( 'si_title' => $title ), + array( 'rowid' => $id ), + __METHOD__, + array( $dbw->lowPriorityOption() ) ); + } +} + +/** + * @ingroup Search + */ +class SqliteSearchResultSet extends SearchResultSet { + function SqliteSearchResultSet( $resultSet, $terms, $totalHits=null ) { + $this->mResultSet = $resultSet; + $this->mTerms = $terms; + $this->mTotalHits = $totalHits; + } + + function termMatches() { + return $this->mTerms; + } + + function numRows() { + return $this->mResultSet->numRows(); + } + + function next() { + $row = $this->mResultSet->fetchObject(); + if( $row === false ) { + return false; + } else { + return new SearchResult( $row ); + } + } + + function free() { + $this->mResultSet->free(); + } + + + function getTotalHits() { + return $this->mTotalHits; + } +} \ No newline at end of file Property changes on: branches/sqlite/includes/search/SearchSqlite.php ___________________________________________________________________ Added: svn:eol-style + native Modified: branches/sqlite/maintenance/sqlite/archives/patch-fts3-searchindex.sql =================================================================== --- branches/sqlite/maintenance/sqlite/archives/patch-fts3-searchindex.sql 2009-10-28 09:10:01 UTC (rev 58249) +++ branches/sqlite/maintenance/sqlite/archives/patch-fts3-searchindex.sql 2009-10-28 10:29:22 UTC (rev 58250) @@ -5,7 +5,8 @@ DROP TABLE IF EXISTS /*_*/searchindex; CREATE VIRTUAL TABLE /*_*/searchindex USING FTS3( -- Key to page_id - si_page INTEGER NOT NULL, + -- Disabled, instead we use the built-in rowid column + --si_page INTEGER NOT NULL, -- Munged version of title si_title, @@ -13,3 +14,5 @@ -- Munged version of body text si_text ); + +INSERT INTO /*_*/updatelog VALUES ('fts3'); \ No newline at end of file Modified: branches/sqlite/maintenance/sqlite/archives/patch-searchindex-no-fts.sql =================================================================== --- branches/sqlite/maintenance/sqlite/archives/patch-searchindex-no-fts.sql 2009-10-28 09:10:01 UTC (rev 58249) +++ branches/sqlite/maintenance/sqlite/archives/patch-searchindex-no-fts.sql 2009-10-28 10:29:22 UTC (rev 58250) @@ -4,13 +4,21 @@ -- to environment without it. DROP TABLE IF EXISTS /*_*/searchindex; + +-- These are pieces of FTS3-enabled searchindex +DROP TABLE IF EXISTS /*_*/searchindex_content; +DROP TABLE IF EXISTS /*_*/searchindex_segdir; +DROP TABLE IF EXISTS /*_*/searchindex_segments; + CREATE TABLE /*_*/searchindex ( -- Key to page_id - si_page INTEGER NOT NULL, + -- si_page INTEGER NOT NULL, -- Munged version of title si_title TEXT, -- Munged version of body text si_text TEXT -); \ No newline at end of file +); + +DELETE FROM /*_*/updatelog WHERE ul_key='fts3'; \ No newline at end of file _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs