Revision: 45758
Author:   rainman
Date:     2009-01-14 22:33:55 +0000 (Wed, 14 Jan 2009)

Log Message:
-----------
Rewrite replacePrefixes:
* make a regexp of namespace names to replace then do search/replace
* now namespaces with spaces are correctly rewritten
* remove some arcane syntax noone is using

Modified Paths:
--------------
    trunk/extensions/MWSearch/MWSearch_body.php

Modified: trunk/extensions/MWSearch/MWSearch_body.php
===================================================================
--- trunk/extensions/MWSearch/MWSearch_body.php 2009-01-14 22:31:25 UTC (rev 
45757)
+++ trunk/extensions/MWSearch/MWSearch_body.php 2009-01-14 22:33:55 UTC (rev 
45758)
@@ -42,7 +42,6 @@
         * 
         * 1) rewrite namespaces into standardized form 
         * e.g. image:clouds -> [6]:clouds
-        * e.g. help,wp:npov -> [12,4]:npov
         * 
         * 2) rewrite localizations of "search everything" keyword
         * e.g. alle:heidegger -> all:heidegger
@@ -86,79 +85,53 @@
                        return trim($ret);
                }
                
-               for($i = 0 ; $i < $qlen ; $i++){
-                       $c = $query[$i];
-
-                       // ignore chars in quotes
-                       if($inquotes && $c!='"'); 
-                       // check if $c is valid prefix character
-                       else if(($c >= 'a' && $c <= 'z') ||
-                                ($c >= 'A' && $c <= 'Z') ||
-                                $c == '_' || $c == '-' || $c ==','){
-                               if($len == 0){
-                                       $start = $i; // begin of token
-                                       $len = 1;
-                               } else
-                                       $len++; 
-                       // check for utf-8 chars
-                       } else if(($c >= "\xc0" && $c <= "\xff")){ 
-                               $utf8len = 1;
-                               for($j = $i+1; $j < $qlen; $j++){ // fetch 
extra utf-8 bytes
-                                       if($query[$j] >= "\x80" && $query[$j] 
<= "\xbf")
-                                               $utf8len++;
-                                       else
-                                               break;
-                               }
-                               if($len == 0){
-                                       $start = $i;
-                                       $len = $utf8len;
-                               } else
-                                       $len += $utf8len;
-                               $i = $j - 1;  // we consumed the chars
-                       // check for end of prefix (i.e. semicolon)
-                       } else if($c == ':' && $len !=0){
-                               $rewrite = array(); // here we collect 
namespaces 
-                               $prefixes = 
explode(',',substr($query,$start,$len));
-                               // iterate thru comma-separated list of prefixes
-                               foreach($prefixes as $prefix){
-                                       $index = 
$wgContLang->getNsIndex($prefix);
-                                       
-                                       // check for special prefixes 
all/incategory
-                                       if($prefix == $allkeyword){
-                                               $rewrite = 'all';
-                                               break;
-                                       // check for localized names of 
namespaces
-                                       } else if($index !== false)
-                                               $rewrite[] = $index;            
                        
-                               }
-                               $translated = null;
-                               if($rewrite === 'all')
-                                       $translated = $rewrite;
-                               else if(count($rewrite) != 0)
-                                       $translated = 
'['.implode(',',array_unique($rewrite)).']';
-
-                               if(isset($translated)){
-                                       // append text before the prefix, and 
then the prefix
-                                       $rewritten .= 
substr($query,$rindex,$start-$rindex);
-                                       $rewritten .= $translated . ':';
-                                       $rindex = $i+1;
-                               }
-                               
-                               $len = 0;
-                       } else{ // end of token
-                               if($c == '"') // get in/out of quotes
-                                       $inquotes = !$inquotes;
-                               
-                               $len = 0;
+               global $wgCanonicalNamespaceNames, $wgNamespaceAliases;
+               $nsNamesRaw = array_merge($wgContLang->getNamespaces(), 
$wgCanonicalNamespaceNames, 
+                       array_keys( array_merge($wgNamespaceAliases, 
$wgContLang->namespaceAliases) ) );
+                       
+               # add all namespace names w/o spaces
+               $nsNames = array();
+               foreach($nsNamesRaw as $ns){
+                       if( $ns != ''){
+                               $nsNames[] = $ns;
+                               $nsNames[] = str_replace('_',' ',$ns);
                        }
-                               
                }
-               // add rest of the original query that doesn't need rewritting
-               $rewritten .= substr($query,$rindex,$qlen-$rindex);
+
+               $regexp = implode('|',array_unique( $nsNames ));
+               
+               # rewrite the query by replacing valid namespace names
+               $parts = preg_split('/(")/',$query,-1,PREG_SPLIT_DELIM_CAPTURE);
+               $inquotes = false;
+               $rewritten = '';
+               foreach($parts as $part){
+                       if( $part == '"'){ # stuff in quote doesnt get rewritten
+                               $rewritten .= $part;
+                               $inquotes = !$inquotes;
+                       } elseif( $inquotes ){
+                               $rewritten .= $part;                            
+                       } else{
+                               # replace namespaces
+                               $r = 
preg_replace_callback('/('.$regexp.'):/i',array($this,'replaceNamespace'),$part);
+                               # replace to backend all: notation
+                               $rewritten .= str_replace($allkeyword.':', 
'all:', $r);
+                       }
+               }               
                wfProfileOut($fname);
                return $rewritten;
        }
        
+       /** callback to replace namespace names to internal notation, e.g. 
User: -> [2]: */ 
+       function replaceNamespace($matches){
+               global $wgContLang;
+               $inx = $wgContLang->getNsIndex(str_replace(' ', '_', 
$matches[1]));
+               if ($inx === false)
+                       return $matches[0];
+               else
+                       return "[$inx]:";
+               
+       }
+       
        function acceptListRedirects() {
                return false;
        }



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to