http://www.mediawiki.org/wiki/Special:Code/MediaWiki/67607

Revision: 67607
Author:   liangent
Date:     2010-06-08 10:51:26 +0000 (Tue, 08 Jun 2010)

Log Message:
-----------
Make sortkeys more unique and sorted

Modified Paths:
--------------
    trunk/extensions/CategoryMultisortChinese/CategoryMultisortChinese.hooks.php

Modified: 
trunk/extensions/CategoryMultisortChinese/CategoryMultisortChinese.hooks.php
===================================================================
--- 
trunk/extensions/CategoryMultisortChinese/CategoryMultisortChinese.hooks.php    
    2010-06-08 09:23:59 UTC (rev 67606)
+++ 
trunk/extensions/CategoryMultisortChinese/CategoryMultisortChinese.hooks.php    
    2010-06-08 10:51:26 UTC (rev 67607)
@@ -41,37 +41,34 @@
                
                $title = $parser->getTitle();
                $text = $title->getText();
-               $fc = utf8ToCodepoint( $wgContLang->firstChar( $text ) );
                
                $this->onCategoryMultisortSortkeys_setDefaultSortkey( 
$categoryMultisorts, 'mandarin',
-                       array_key_exists( $fc, $data->mandarin ) ? 
$data->mandarin[$fc] : ''
+                       
$this->onCategoryMultisortSortkeys_buildMandarinSortkey( $data, $text )
                );
                $this->onCategoryMultisortSortkeys_setDefaultSortkey( 
$categoryMultisorts, 'stroke',
-                       $this->onCategoryMultisortSortkeys_getStroke( $data, 
$fc )
+                       $this->onCategoryMultisortSortkeys_buildStrokeSortkey( 
$data, $text )
                );
                $this->onCategoryMultisortSortkeys_setDefaultSortkey( 
$categoryMultisorts, 'radical',
-                       $this->onCategoryMultisortSortkeys_getRadical( $data, 
$fc )
+                       $this->onCategoryMultisortSortkeys_buildRadicalSortkey( 
$data, $text )
                );
                
                $conv = $wgContLang->autoConvertToAllVariants( $text );
                
                if ( array_key_exists( 'zh-hans', $conv ) ) {
-                       $fc = utf8ToCodepoint( $wgContLang->firstChar( 
$conv['zh-hans'] ) );
                        $this->onCategoryMultisortSortkeys_setDefaultSortkey( 
$categoryMultisorts, 'stroke-s',
-                               $this->onCategoryMultisortSortkeys_getStroke( 
$data, $fc )
+                               
$this->onCategoryMultisortSortkeys_buildStrokeSortkey( $data, $conv['zh-hans'] )
                        );
                        $this->onCategoryMultisortSortkeys_setDefaultSortkey( 
$categoryMultisorts, 'radical-s',
-                               $this->onCategoryMultisortSortkeys_getRadical( 
$data, $fc )
+                               
$this->onCategoryMultisortSortkeys_buildRadicalSortkey( $data, $conv['zh-hans'] 
)
                        );
                }
                
                if ( array_key_exists( 'zh-hant', $conv ) ) {
-                       $fc = utf8ToCodepoint( $wgContLang->firstChar( 
$conv['zh-hant'] ) );
                        $this->onCategoryMultisortSortkeys_setDefaultSortkey( 
$categoryMultisorts, 'stroke-t',
-                               $this->onCategoryMultisortSortkeys_getStroke( 
$data, $fc )
+                               
$this->onCategoryMultisortSortkeys_buildStrokeSortkey( $data, $conv['zh-hant'] )
                        );
                        $this->onCategoryMultisortSortkeys_setDefaultSortkey( 
$categoryMultisorts, 'radical-t',
-                               $this->onCategoryMultisortSortkeys_getRadical( 
$data, $fc )
+                               
$this->onCategoryMultisortSortkeys_buildRadicalSortkey( $data, $conv['zh-hant'] 
)
                        );
                }
                
@@ -98,4 +95,55 @@
                        return sprintf( '%s%03d', codepointToUtf8( $radicalCp 
), $rest );
                }
        }
+       
+       function onCategoryMultisortSortkeys_splitString( $str ) {
+           global $wgContLang;
+           
+           $result = array();
+           while ( $str ) {
+               $fc = $wgContLang->firstChar( $str );
+               $result[] = $fc;
+               $str = substr( $str, strlen( $fc ) );
+           }
+           return $result;
+       }
+       
+       function onCategoryMultisortSortkeys_buildMandarinSortkey( $data, $str 
) {
+           $result = '';
+           foreach ( $this->onCategoryMultisortSortkeys_splitString( $str ) as 
$ch ) {
+               # One UTF-8 character can have 4 bytes max.
+               $c = str_pad( $ch, 4, "\0");
+               $chcp = utf8ToCodepoint( $ch );
+               # One Mandarin entry can have 7 bytes max.
+               $md = str_pad( array_key_exists( $chcp, $data->mandarin ) ? 
$data->mandarin[$chcp] : '', 7, "\0");
+               $result .= $md . $c;
+           }
+           return $result;
+       }
+       
+       function onCategoryMultisortSortkeys_buildStrokeSortkey( $data, $str ) {
+           $result = '';
+           foreach ( $this->onCategoryMultisortSortkeys_splitString( $str ) as 
$ch ) {
+               # One UTF-8 character can have 4 bytes max.
+               $c = str_pad( $ch, 4, "\0");
+               $chcp = utf8ToCodepoint( $ch );
+               # One stroke entry always has 3 bytes. 
+               $s = $this->onCategoryMultisortSortkeys_getStroke( $data, $chcp 
);
+               $result .= $s . $c;
+           }
+           return $result;
+       }
+       
+       function onCategoryMultisortSortkeys_buildRadicalSortkey( $data, $str ) 
{
+           $result = '';
+           foreach ( $this->onCategoryMultisortSortkeys_splitString( $str ) as 
$ch ) {
+               # One UTF-8 character can have 4 bytes max.
+               $c = str_pad( $ch, 4, "\0");
+               $chcp = utf8ToCodepoint( $ch );
+               # One radical-stroke entry always has 3 (radical) + 3 (stroke) 
= 6 bytes. 
+               $r = $this->onCategoryMultisortSortkeys_getRadical( $data, 
$chcp );
+               $result .= $r . $c;
+           }
+           return $result;
+       }
 }



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to