Transliterator.php

conrad Sat, 26 Sep 2009 09:53:12 -0700

http://www.mediawiki.org/wiki/Special:Code/MediaWiki/56951


Revision: 56951
Author:   conrad
Date:     2009-09-26 12:47:41 +0000 (Sat, 26 Sep 2009)

Log Message:
-----------
Use Sanitizer::decodeCharReferences instead of html_entitiy_decoe
-trailing white space 

Modified Paths:
--------------
    trunk/extensions/Transliterator/Transliterator.php

Modified: trunk/extensions/Transliterator/Transliterator.php
===================================================================
--- trunk/extensions/Transliterator/Transliterator.php  2009-09-26 11:54:07 UTC 
(rev 56950)
+++ trunk/extensions/Transliterator/Transliterator.php  2009-09-26 12:47:41 UTC 
(rev 56951)
@@ -5,7 +5,6 @@
  * @subpackage Extensions
  *
  * @link http://www.mediawiki.org/wiki/Extension:Transliterator Documentation
- * @link http://en.wiktionary.org/wiki/User:Conrad.Irwin/Transliterator.php 
Original
  *
  * @author Conrad Irwin
  * @modifier Purodha Blissenbach
@@ -30,17 +29,17 @@
 
 /**
     Extension:Transliterator Copyright (C) 2009 Conrad.Irwin
- 
+
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
     (at your option) any later version.
- 
+
     This program is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.
- 
+
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
USA
@@ -73,8 +72,8 @@
 
 class ExtTransliterator {
 
-    const FIRST = "\x1F"; // A character that will be inserted in places where 
the ^ and $ should match
-    const LAST = "\x1E"; // A character that will be inserted in places where 
the ^ and $ should match
+    const FIRST = "\x1F"; // A character that will be appended when ^ should 
match at the start
+    const LAST = "\x1E"; // A character that will be appended when $ should 
match at the end
     const CACHE_PREFIX = "extTransliterator.2:"; // The prefix to use for 
cache items (the number should be incremented when the map format changes)
     var $mPages = null;  // An Array of "transliterator:$mapname" => The 
database row for that template.
     var $mMaps = array();// An Array of "$mapname" => The map parsed from that 
page.
@@ -91,7 +90,7 @@
         $i = 1;
         while ( $i < count( $split ) ) {
             if ( isset( $utfCombiningClass[$split[$i]] ) ) {
-               $split[$i - 1] .= $split[$i]; 
+               $split[$i - 1] .= $split[$i];
                unset( $split[$i] );
 
             } else {
@@ -112,7 +111,7 @@
     }
 
     /**
-     * Given a codepoints or letters array returns a list that contains 1 for 
every 
+     * Given a codepoints or letters array returns a list that contains 1 for 
every
      * alphabetic character and accent, and 0 otherwise. This allows for 
edge-of-word
      * detection.
      */
@@ -129,7 +128,7 @@
     }
 
     /**
-     * Get all the existing maps in one query, useful given that the default 
+     * Get all the existing maps in one query, useful given that the default
      * behaviour of failing silently is designed to allow it to be used by
      * templates that don't know if a map exists, so may try far too often.
      */
@@ -166,17 +165,17 @@
         $mappage = $prefix.$name;
 
         // Have we used it on thie page already?
-        if ( isset( $this->mMaps[$mappage] ) ) 
+        if ( isset( $this->mMaps[$mappage] ) )
             return $this->mMaps[$mappage];
 
         // Have we used it recently?
         $cached = $wgMemc->get( self::CACHE_PREFIX . $name );
-        if ( $cached ) 
+        if ( $cached )
             return $this->mMaps[$mappage] = ($cached == "false" ? false : 
$cached);
 
         // Does it exist at all?
         $existing = $this->getExistingMapNames( $prefix );
-        if (! isset( $existing[$mappage] ) ) 
+        if (! isset( $existing[$mappage] ) )
             $map = false;
 
         else
@@ -188,7 +187,7 @@
 
     /**
      * Parse a map input syntax into a map.
-     * 
+     *
      * Input syntax is a set of lines.
      *  All " " are ignored.
      *  Lines starting with # are ignored, remaining lines are split by =>
@@ -233,13 +232,13 @@
             // Or, could just signify that the message was blank
             if ( $first_line == "<$mappage>")
                 return false;
-            else if ( preg_replace( '/<(decompose|sensitive)>/', '', 
$first_line ) != '') 
+            else if ( preg_replace( '/<(decompose|sensitive)>/', '', 
$first_line ) != '')
                 return wfMsg( 'transliterator-error-syntax', $first_line, 
$mappage );
 
             if ( strpos( $first_line, "<decompose>" ) !== FALSE ) {
                 $map['__decompose__'] = true;
                 $decompose = true;
-            } 
+            }
             if ( strpos( $first_line, "<sensitive>" ) !== FALSE ) {
                 $map['__sensitive__'] = true;
             }
@@ -254,11 +253,11 @@
 
             $pair = preg_split( '/\s*=>\s*/', $line );
 
-            if ( count( $pair ) != 2 ) 
+            if ( count( $pair ) != 2 )
                 return wfMsg( "transliterator-error-syntax", $line, $mappage );
 
             $from = $pair[0];
-            $to = html_entity_decode( $pair[1], ENT_QUOTES, 'UTF-8' );
+            $to = Sanitizer::decodeCharReferences( $pair[1], ENT_QUOTES, 
'UTF-8' );
 
             // Convert the ^ and $ selectors into special characters for 
matching
             // Leave single ^ and $'s alone incase someone wants to use them
@@ -275,7 +274,7 @@
             }
 
             // Now we've looked at our syntax we can remove html escaping to 
reveal the true form
-            $from = html_entity_decode( $from, ENT_QUOTES, 'UTF-8' );
+            $from = Sanitizer::decodeCharReferences( $from, ENT_QUOTES, 
'UTF-8' );
             if ( $decompose ) { // Undo the NFCing of MediaWiki
                 $from = UtfNormal::toNFD( $from );
             }
@@ -284,9 +283,9 @@
             if ( isset( $map[$from] ) ) {
 
                 // Or a rule of the same length, i.e. the same rule.
-                if ( is_string( $map[$from] ) && $to != $map[$from] ) 
+                if ( is_string( $map[$from] ) && $to != $map[$from] )
                     return wfMsg("transliterator-error-ambiguous", $line, 
$mappage);
-                
+
             } else if ( strlen( $from ) > 1 ){
 
                 // Bail if the left hand side is too long (has performance 
implications otherwise)
@@ -298,7 +297,7 @@
                 for ( $i = 1; $i < $fromlen; $i++ ) {
                     $substr = substr( $from, 0, $i );
 
-                    if (! isset( $map[$substr] ) ) 
+                    if (! isset( $map[$substr] ) )
                         $map[$substr] = true;
                 }
             } // else we have the default rule
@@ -310,9 +309,9 @@
     }
 
     /**
-     * Transliterate a word by iteratively finding the longest substring from 
+     * Transliterate a word by iteratively finding the longest substring from
      * the start of the untransliterated string that we have a rule for, and
-     * transliterating it. 
+     * transliterating it.
      */
     function transliterate( $word, $map )
     {
@@ -452,7 +451,7 @@
             $format = '$1';
         }
 
-        if ( trim( $answer ) != '') { 
+        if ( trim( $answer ) != '') {
             return str_replace('$1', $answer, $format);
         }
 
@@ -468,12 +467,12 @@
             $output = '<span class="transliterator error"> '.$map.' </span>';
 
         } else { // A Map
-            $trans = UtfNormal::toNFC( $this->transliterate( 
html_entity_decode( $word, ENT_QUOTES, 'UTF-8' ), $map ) );
+            $trans = UtfNormal::toNFC( $this->transliterate( 
Sanitizer::decodeCharReferences( $word ), $map ) );
             $output = str_replace( '$1', $trans, $format );
         }
 
         // Populate the dependency table so that we get re-rendered if the map 
changes.
-        if ( isset( $this->mPages[$mappage] ) ) 
+        if ( isset( $this->mPages[$mappage] ) )
             $title = Title::newFromRow( $this->mPages[$mappage] );
         else
             $title = Title::newFromText( $mappage, NS_MEDIAWIKI );
@@ -497,6 +496,7 @@
     static function purgeNewTitle ( &$title, &$newtitle, $a=false, $b=false, 
$c=false ) {
         return self::purgeTitle( $newtitle );
     }
+
     /**
      * Called on ArticleUndelete (and by other purge hook handlers)
      */



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

[MediaWiki-CVS] SVN: [56951] trunk/extensions/Transliterator/Transliterator.php

Reply via email to