X-Git-Url: https://scripts.mit.edu/gitweb/autoinstallsdev/mediawiki.git/blobdiff_plain/d7967d5e4460e08b6b258307afbca0596b18a3dd..18a6620945d02687fbcfc4c27355d952fd748b41:/languages/classes/LanguageZh.php diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php index bcdf7dd8..0055a33b 100644 --- a/languages/classes/LanguageZh.php +++ b/languages/classes/LanguageZh.php @@ -1,102 +1,156 @@ mDescCodeSep = ':'; + $this->mDescVarSep = ';'; + parent::__construct( $langobj, $maincode, + $variants, + $variantfallbacks, + $flags, + $manualLevel ); + $names = array( + 'zh' => '原文', + 'zh-hans' => '简体', + 'zh-hant' => '繁體', + 'zh-cn' => '大陆', + 'zh-tw' => '台灣', + 'zh-hk' => '香港', + 'zh-mo' => '澳門', + 'zh-sg' => '新加坡', + 'zh-my' => '大马', + ); + $this->mVariantNames = array_merge( $this->mVariantNames, $names ); + } + function loadDefaultTables() { - require( dirname(__FILE__)."/../../includes/ZhConversion.php" ); + require( dirname( __FILE__ ) . "/../../includes/ZhConversion.php" ); $this->mTables = array( - 'zh-cn' => new ReplacementArray( $zh2CN ), - 'zh-tw' => new ReplacementArray( $zh2TW ), - 'zh-sg' => new ReplacementArray( array_merge($zh2CN, $zh2SG) ), - 'zh-hk' => new ReplacementArray( array_merge($zh2TW, $zh2HK) ), - 'zh' => new ReplacementArray + 'zh-hans' => new ReplacementArray( $zh2Hans ), + 'zh-hant' => new ReplacementArray( $zh2Hant ), + 'zh-cn' => new ReplacementArray( array_merge( $zh2Hans, $zh2CN ) ), + 'zh-hk' => new ReplacementArray( array_merge( $zh2Hant, $zh2HK ) ), + 'zh-mo' => new ReplacementArray( array_merge( $zh2Hant, $zh2HK ) ), + 'zh-my' => new ReplacementArray( array_merge( $zh2Hans, $zh2SG ) ), + 'zh-sg' => new ReplacementArray( array_merge( $zh2Hans, $zh2SG ) ), + 'zh-tw' => new ReplacementArray( array_merge( $zh2Hant, $zh2TW ) ), + 'zh' => new ReplacementArray ); } function postLoadTables() { - $this->mTables['zh-sg']->merge( $this->mTables['zh-cn'] ); - $this->mTables['zh-hk']->merge( $this->mTables['zh-tw'] ); - } + $this->mTables['zh-cn']->merge( $this->mTables['zh-hans'] ); + $this->mTables['zh-hk']->merge( $this->mTables['zh-hant'] ); + $this->mTables['zh-mo']->merge( $this->mTables['zh-hant'] ); + $this->mTables['zh-my']->merge( $this->mTables['zh-hans'] ); + $this->mTables['zh-sg']->merge( $this->mTables['zh-hans'] ); + $this->mTables['zh-tw']->merge( $this->mTables['zh-hant'] ); + } /* there shouldn't be any latin text in Chinese conversion, so no need to mark anything. $noParse is there for compatibility with LanguageConvert::markNoConversion - */ - function markNoConversion($text, $noParse = false) { + */ + function markNoConversion( $text, $noParse = false ) { return $text; } function convertCategoryKey( $key ) { - return $this->autoConvert( $key, 'zh-cn' ); + return $this->autoConvert( $key, 'zh' ); } } - -/* class that handles both Traditional and Simplified Chinese - right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk. -*/ -class LanguageZh extends LanguageZh_cn { +/** + * class that handles both Traditional and Simplified Chinese + * right now it only distinguish zh_hans, zh_hant, zh_cn, zh_tw, zh_sg and zh_hk. + * + * @ingroup Language + */ +class LanguageZh extends LanguageZh_hans { function __construct() { global $wgHooks; parent::__construct(); - $this->mConverter = new ZhConverter($this, 'zh', - array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'), - array('zh'=>'zh-cn', - 'zh-cn'=>'zh-sg', - 'zh-sg'=>'zh-cn', - 'zh-tw'=>'zh-hk', - 'zh-hk'=>'zh-tw')); + + $variants = array( 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw' ); + + $variantfallbacks = array( + 'zh' => array( 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ), + 'zh-hans' => array( 'zh-cn', 'zh-sg', 'zh-my' ), + 'zh-hant' => array( 'zh-tw', 'zh-hk', 'zh-mo' ), + 'zh-cn' => array( 'zh-hans', 'zh-sg', 'zh-my' ), + 'zh-sg' => array( 'zh-hans', 'zh-cn', 'zh-my' ), + 'zh-my' => array( 'zh-hans', 'zh-sg', 'zh-cn' ), + 'zh-tw' => array( 'zh-hant', 'zh-hk', 'zh-mo' ), + 'zh-hk' => array( 'zh-hant', 'zh-mo', 'zh-tw' ), + 'zh-mo' => array( 'zh-hant', 'zh-hk', 'zh-tw' ), + ); + $ml = array( + 'zh' => 'disable', + 'zh-hans' => 'unidirectional', + 'zh-hant' => 'unidirectional', + ); + + $this->mConverter = new ZhConverter( $this, 'zh', + $variants, $variantfallbacks, + array(), + $ml ); + $wgHooks['ArticleSaveComplete'][] = $this->mConverter; } - # this should give much better diff info function segmentForDiff( $text ) { return preg_replace( "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' ' .\"$1\"", $text); + "' ' .\"$1\"", $text ); } function unsegmentForDiff( $text ) { return preg_replace( "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "\"$1\"", $text); + "\"$1\"", $text ); } - // word segmentation - function stripForSearch( $string ) { - $fname="LanguageZh::stripForSearch"; - wfProfileIn( $fname ); - - // eventually this should be a word segmentation - // for now just treat each character as a word - $t = preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' ' .\"$1\"", $string); - - //always convert to zh-cn before indexing. it should be - //better to use zh-cn for search, since conversion from - //Traditional to Simplified is less ambiguous than the - //other way around - - $t = $this->mConverter->autoConvert($t, 'zh-cn'); - $t = parent::stripForSearch( $t ); - wfProfileOut( $fname ); - return $t; + /** + * auto convert to zh-hans and normalize special characters. + * + * @param $string String + * @param $autoVariant String, default to 'zh-hans' + * @return String + */ + function normalizeForSearch( $string, $autoVariant = 'zh-hans' ) { + wfProfileIn( __METHOD__ ); + + // always convert to zh-hans before indexing. it should be + // better to use zh-hans for search, since conversion from + // Traditional to Simplified is less ambiguous than the + // other way around + $s = $this->mConverter->autoConvert( $string, $autoVariant ); + // LanguageZh_hans::normalizeForSearch + $s = parent::normalizeForSearch( $s ); + wfProfileOut( __METHOD__ ); + return $s; } function convertForSearchResult( $termsArray ) { $terms = implode( '|', $termsArray ); + $terms = self::convertDoubleWidth( $terms ); $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) ); - $ret = array_unique( explode('|', $terms) ); + $ret = array_unique( explode( '|', $terms ) ); return $ret; } - }