X-Git-Url: https://scripts.mit.edu/gitweb/autoinstallsdev/mediawiki.git/blobdiff_plain/a4b52d2fe555a507c376e78ee624898c55968364..d7967d5e4460e08b6b258307afbca0596b18a3dd:/languages/classes/LanguageZh.php diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php new file mode 100644 index 00000000..bcdf7dd8 --- /dev/null +++ b/languages/classes/LanguageZh.php @@ -0,0 +1,102 @@ +mTables = array( + 'zh-cn' => new ReplacementArray( $zh2CN ), + 'zh-tw' => new ReplacementArray( $zh2TW ), + 'zh-sg' => new ReplacementArray( array_merge($zh2CN, $zh2SG) ), + 'zh-hk' => new ReplacementArray( array_merge($zh2TW, $zh2HK) ), + 'zh' => new ReplacementArray + ); + } + + function postLoadTables() { + $this->mTables['zh-sg']->merge( $this->mTables['zh-cn'] ); + $this->mTables['zh-hk']->merge( $this->mTables['zh-tw'] ); + } + + /* there shouldn't be any latin text in Chinese conversion, so no need + to mark anything. + $noParse is there for compatibility with LanguageConvert::markNoConversion + */ + function markNoConversion($text, $noParse = false) { + return $text; + } + + function convertCategoryKey( $key ) { + return $this->autoConvert( $key, 'zh-cn' ); + } +} + + +/* class that handles both Traditional and Simplified Chinese + right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk. +*/ +class LanguageZh extends LanguageZh_cn { + + function __construct() { + global $wgHooks; + parent::__construct(); + $this->mConverter = new ZhConverter($this, 'zh', + array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'), + array('zh'=>'zh-cn', + 'zh-cn'=>'zh-sg', + 'zh-sg'=>'zh-cn', + 'zh-tw'=>'zh-hk', + 'zh-hk'=>'zh-tw')); + $wgHooks['ArticleSaveComplete'][] = $this->mConverter; + } + + + # this should give much better diff info + function segmentForDiff( $text ) { + return preg_replace( + "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", + "' ' .\"$1\"", $text); + } + + function unsegmentForDiff( $text ) { + return preg_replace( + "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e", + "\"$1\"", $text); + } + + // word segmentation + function stripForSearch( $string ) { + $fname="LanguageZh::stripForSearch"; + wfProfileIn( $fname ); + + // eventually this should be a word segmentation + // for now just treat each character as a word + $t = preg_replace( + "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", + "' ' .\"$1\"", $string); + + //always convert to zh-cn before indexing. it should be + //better to use zh-cn for search, since conversion from + //Traditional to Simplified is less ambiguous than the + //other way around + + $t = $this->mConverter->autoConvert($t, 'zh-cn'); + $t = parent::stripForSearch( $t ); + wfProfileOut( $fname ); + return $t; + + } + + function convertForSearchResult( $termsArray ) { + $terms = implode( '|', $termsArray ); + $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) ); + $ret = array_unique( explode('|', $terms) ); + return $ret; + } + +} +