]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blobdiff - languages/classes/LanguageZh.php
MediaWiki 1.17.0
[autoinstallsdev/mediawiki.git] / languages / classes / LanguageZh.php
index bcdf7dd8a8d044a5f872ce69080625efc3ab4b34..0055a33b064a968b052064970701aa0e7e743c12 100644 (file)
 <?php
-/**
-  * @addtogroup Language
-  */
-require_once( dirname(__FILE__).'/../LanguageConverter.php' );
-require_once( dirname(__FILE__).'/LanguageZh_cn.php' );
 
+require_once( dirname( __FILE__ ) . '/../LanguageConverter.php' );
+require_once( dirname( __FILE__ ) . '/LanguageZh_hans.php' );
+
+/**
+ * @ingroup Language
+ */
 class ZhConverter extends LanguageConverter {
+
+       function __construct( $langobj, $maincode,
+                                                               $variants = array(),
+                                                               $variantfallbacks = array(),
+                                                               $flags = array(),
+                                                               $manualLevel = array() ) {
+               $this->mDescCodeSep = ':';
+               $this->mDescVarSep = ';';
+               parent::__construct( $langobj, $maincode,
+                                                                       $variants,
+                                                                       $variantfallbacks,
+                                                                       $flags,
+                                                                       $manualLevel );
+               $names = array(
+                       'zh'      => '原文',
+                       'zh-hans' => '简体',
+                       'zh-hant' => '繁體',
+                       'zh-cn'   => '大陆',
+                       'zh-tw'   => '台灣',
+                       'zh-hk'   => '香港',
+                       'zh-mo'   => '澳門',
+                       'zh-sg'   => '新加坡',
+                       'zh-my'   => '大马',
+               );
+               $this->mVariantNames = array_merge( $this->mVariantNames, $names );
+       }
+
        function loadDefaultTables() {
-               require( dirname(__FILE__)."/../../includes/ZhConversion.php" );
+               require( dirname( __FILE__ ) . "/../../includes/ZhConversion.php" );
                $this->mTables = array(
-                       'zh-cn' => new ReplacementArray( $zh2CN ),
-                       'zh-tw' => new ReplacementArray( $zh2TW ),
-                       'zh-sg' => new ReplacementArray( array_merge($zh2CN, $zh2SG) ),
-                       'zh-hk' => new ReplacementArray( array_merge($zh2TW, $zh2HK) ),
-                       'zh' => new ReplacementArray
+                       'zh-hans' => new ReplacementArray( $zh2Hans ),
+                       'zh-hant' => new ReplacementArray( $zh2Hant ),
+                       'zh-cn'   => new ReplacementArray( array_merge( $zh2Hans, $zh2CN ) ),
+                       'zh-hk'   => new ReplacementArray( array_merge( $zh2Hant, $zh2HK ) ),
+                       'zh-mo'   => new ReplacementArray( array_merge( $zh2Hant, $zh2HK ) ),
+                       'zh-my'   => new ReplacementArray( array_merge( $zh2Hans, $zh2SG ) ),
+                       'zh-sg'   => new ReplacementArray( array_merge( $zh2Hans, $zh2SG ) ),
+                       'zh-tw'   => new ReplacementArray( array_merge( $zh2Hant, $zh2TW ) ),
+                       'zh'      => new ReplacementArray
                );
        }
 
        function postLoadTables() {
-               $this->mTables['zh-sg']->merge( $this->mTables['zh-cn'] );
-               $this->mTables['zh-hk']->merge( $this->mTables['zh-tw'] );
-    }
+               $this->mTables['zh-cn']->merge( $this->mTables['zh-hans'] );
+               $this->mTables['zh-hk']->merge( $this->mTables['zh-hant'] );
+               $this->mTables['zh-mo']->merge( $this->mTables['zh-hant'] );
+               $this->mTables['zh-my']->merge( $this->mTables['zh-hans'] );
+               $this->mTables['zh-sg']->merge( $this->mTables['zh-hans'] );
+               $this->mTables['zh-tw']->merge( $this->mTables['zh-hant'] );
+       }
 
        /* there shouldn't be any latin text in Chinese conversion, so no need
           to mark anything.
           $noParse is there for compatibility with LanguageConvert::markNoConversion
-    */
-       function markNoConversion($text, $noParse = false) {
+        */
+       function markNoConversion( $text, $noParse = false ) {
                return $text;
        }
 
        function convertCategoryKey( $key ) {
-               return $this->autoConvert( $key, 'zh-cn' );
+               return $this->autoConvert( $key, 'zh' );
        }
 }
 
-
-/* class that handles both Traditional and Simplified Chinese
-   right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk.
-*/
-class LanguageZh extends LanguageZh_cn {
+/**
+ * class that handles both Traditional and Simplified Chinese
+ * right now it only distinguish zh_hans, zh_hant, zh_cn, zh_tw, zh_sg and zh_hk.
+ *
+ * @ingroup Language
+ */
+class LanguageZh extends LanguageZh_hans {
 
        function __construct() {
                global $wgHooks;
                parent::__construct();
-               $this->mConverter = new ZhConverter($this, 'zh',
-                                            array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'),
-                                                                                       array('zh'=>'zh-cn',
-                                                                                                 'zh-cn'=>'zh-sg',
-                                                                                                 'zh-sg'=>'zh-cn',
-                                                                                                 'zh-tw'=>'zh-hk',
-                                                                                                 'zh-hk'=>'zh-tw'));
+
+               $variants = array( 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw' );
+
+               $variantfallbacks = array(
+                       'zh'      => array( 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ),
+                       'zh-hans' => array( 'zh-cn', 'zh-sg', 'zh-my' ),
+                       'zh-hant' => array( 'zh-tw', 'zh-hk', 'zh-mo' ),
+                       'zh-cn'   => array( 'zh-hans', 'zh-sg', 'zh-my' ),
+                       'zh-sg'   => array( 'zh-hans', 'zh-cn', 'zh-my' ),
+                       'zh-my'   => array( 'zh-hans', 'zh-sg', 'zh-cn' ),
+                       'zh-tw'   => array( 'zh-hant', 'zh-hk', 'zh-mo' ),
+                       'zh-hk'   => array( 'zh-hant', 'zh-mo', 'zh-tw' ),
+                       'zh-mo'   => array( 'zh-hant', 'zh-hk', 'zh-tw' ),
+               );
+               $ml = array(
+                       'zh'      => 'disable',
+                       'zh-hans' => 'unidirectional',
+                       'zh-hant' => 'unidirectional',
+               );
+
+               $this->mConverter = new ZhConverter( $this, 'zh',
+                                                               $variants, $variantfallbacks,
+                                                               array(),
+                                                               $ml );
+
                $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
        }
 
-
        # this should give much better diff info
        function segmentForDiff( $text ) {
                return preg_replace(
                        "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                       "' ' .\"$1\"", $text);
+                       "' ' .\"$1\"", $text );
        }
 
        function unsegmentForDiff( $text ) {
                return preg_replace(
                        "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                       "\"$1\"", $text);
+                       "\"$1\"", $text );
        }
 
-       // word segmentation
-       function stripForSearch( $string ) {
-               $fname="LanguageZh::stripForSearch";
-               wfProfileIn( $fname );
-
-               // eventually this should be a word segmentation
-               // for now just treat each character as a word
-               $t = preg_replace(
-                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                               "' ' .\"$1\"", $string);
-
-        //always convert to zh-cn before indexing. it should be
-               //better to use zh-cn for search, since conversion from
-               //Traditional to Simplified is less ambiguous than the
-               //other way around
-
-               $t = $this->mConverter->autoConvert($t, 'zh-cn');
-               $t = parent::stripForSearch( $t );
-               wfProfileOut( $fname );
-               return $t;
+       /**
+        * auto convert to zh-hans and normalize special characters.
+        *
+        * @param $string String
+        * @param $autoVariant String, default to 'zh-hans'
+        * @return String
+        */
+       function normalizeForSearch( $string, $autoVariant = 'zh-hans' ) {
+               wfProfileIn( __METHOD__ );
+
+               // always convert to zh-hans before indexing. it should be
+               // better to use zh-hans for search, since conversion from
+               // Traditional to Simplified is less ambiguous than the
+               // other way around
+               $s = $this->mConverter->autoConvert( $string, $autoVariant );
+               // LanguageZh_hans::normalizeForSearch
+               $s = parent::normalizeForSearch( $s );
+               wfProfileOut( __METHOD__ );
+               return $s;
 
        }
 
        function convertForSearchResult( $termsArray ) {
                $terms = implode( '|', $termsArray );
+               $terms = self::convertDoubleWidth( $terms );
                $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
-               $ret = array_unique( explode('|', $terms) );
+               $ret = array_unique( explode( '|', $terms ) );
                return $ret;
        }
-
 }