]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blobdiff - languages/classes/LanguageZh.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / languages / classes / LanguageZh.php
index 093626909b2d12e834b5b73e817f4f30c2de94cc..e1099f8867f64ac6b3340a7870d4765346d8ea35 100644 (file)
 <?php
-
-require_once( dirname(__FILE__).'/../LanguageConverter.php' );
-require_once( dirname(__FILE__).'/LanguageZh_hans.php' );
+/**
+ * Chinese specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
 
 /**
  * @ingroup Language
  */
 class ZhConverter extends LanguageConverter {
-
-       function __construct($langobj, $maincode,
-                                                               $variants=array(),
-                                                               $variantfallbacks=array(),
-                                                               $markup=array(),
-                                                               $flags = array(),
-                                                               $manualLevel = array() ) {
+       /**
+        * @param Language $langobj
+        * @param string $maincode
+        * @param array $variants
+        * @param array $variantfallbacks
+        * @param array $flags
+        * @param array $manualLevel
+        */
+       function __construct( $langobj, $maincode,
+                                                               $variants = [],
+                                                               $variantfallbacks = [],
+                                                               $flags = [],
+                                                               $manualLevel = [] ) {
                $this->mDescCodeSep = ':';
                $this->mDescVarSep = ';';
-               parent::__construct($langobj, $maincode,
+               parent::__construct( $langobj, $maincode,
                                                                        $variants,
                                                                        $variantfallbacks,
-                                                                       $markup,
                                                                        $flags,
-                                                                       $manualLevel);
-               $names = array(
-                       'zh'      => '原文',
+                                                                       $manualLevel );
+               $names = [
+                       'zh' => '原文',
                        'zh-hans' => '简体',
                        'zh-hant' => '繁體',
-                       'zh-cn'   => '大陆',
-                       'zh-tw'   => '台灣',
-                       'zh-hk'   => '香港',
-                       'zh-mo'   => '澳門',
-                       'zh-sg'   => '新加坡',
-                       'zh-my'   => '大马',
-               );
-               $this->mVariantNames = array_merge($this->mVariantNames,$names);
+                       'zh-cn' => '大陆',
+                       'zh-tw' => '台灣',
+                       'zh-hk' => '香港',
+                       'zh-mo' => '澳門',
+                       'zh-sg' => '新加坡',
+                       'zh-my' => '大马',
+               ];
+               $this->mVariantNames = array_merge( $this->mVariantNames, $names );
        }
 
        function loadDefaultTables() {
-               require( dirname(__FILE__)."/../../includes/ZhConversion.php" );
-               $this->mTables = array(
-                       'zh-hans' => new ReplacementArray( $zh2Hans ),
-                       'zh-hant' => new ReplacementArray( $zh2Hant ),
-                       'zh-cn'   => new ReplacementArray( array_merge($zh2Hans, $zh2CN) ),
-                       'zh-hk'   => new ReplacementArray( array_merge($zh2Hant, $zh2HK) ),
-                       'zh-mo'   => new ReplacementArray( array_merge($zh2Hant, $zh2HK) ),
-                       'zh-my'   => new ReplacementArray( array_merge($zh2Hans, $zh2SG) ),
-                       'zh-sg'   => new ReplacementArray( array_merge($zh2Hans, $zh2SG) ),
-                       'zh-tw'   => new ReplacementArray( array_merge($zh2Hant, $zh2TW) ),
-                       'zh'      => new ReplacementArray
-               );
+               $this->mTables = [
+                       'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
+                       'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
+                       'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
+                       'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
+                       'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
+                       'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
+                       'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
+                       'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ),
+                       'zh' => new ReplacementArray
+               ];
        }
 
        function postLoadTables() {
-               $this->mTables['zh-cn']->merge( $this->mTables['zh-hans'] );
-               $this->mTables['zh-hk']->merge( $this->mTables['zh-hant'] );
-               $this->mTables['zh-mo']->merge( $this->mTables['zh-hant'] );
-               $this->mTables['zh-my']->merge( $this->mTables['zh-hans'] );
-               $this->mTables['zh-sg']->merge( $this->mTables['zh-hans'] );
-               $this->mTables['zh-tw']->merge( $this->mTables['zh-hant'] );
+               $this->mTables['zh-cn']->setArray(
+                       $this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray()
+               );
+               $this->mTables['zh-hk']->setArray(
+                       $this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray()
+               );
+               $this->mTables['zh-mo']->setArray(
+                       $this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray()
+               );
+               $this->mTables['zh-my']->setArray(
+                       $this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray()
+               );
+               $this->mTables['zh-sg']->setArray(
+                       $this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray()
+               );
+               $this->mTables['zh-tw']->setArray(
+                       $this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray()
+               );
        }
 
-       /* there shouldn't be any latin text in Chinese conversion, so no need
-          to mark anything.
-          $noParse is there for compatibility with LanguageConvert::markNoConversion
+       /**
+        * @param string $key
+        * @return string
         */
-       function markNoConversion($text, $noParse = false) {
-               return $text;
-       }
-
        function convertCategoryKey( $key ) {
                return $this->autoConvert( $key, 'zh' );
        }
@@ -80,78 +110,89 @@ class ZhConverter extends LanguageConverter {
  * @ingroup Language
  */
 class LanguageZh extends LanguageZh_hans {
-
        function __construct() {
-               global $wgHooks;
                parent::__construct();
 
-               $variants = array('zh','zh-hans','zh-hant','zh-cn','zh-hk','zh-mo','zh-my','zh-sg','zh-tw');
-               $variantfallbacks = array(
-                       'zh'      => array('zh-hans','zh-hant','zh-cn','zh-tw','zh-hk','zh-sg','zh-mo','zh-my'),
-                       'zh-hans' => array('zh-cn','zh-sg','zh-my'),
-                       'zh-hant' => array('zh-tw','zh-hk','zh-mo'),
-                       'zh-cn'   => array('zh-hans','zh-sg','zh-my'),
-                       'zh-sg'   => array('zh-hans','zh-cn','zh-my'),
-                       'zh-my'   => array('zh-hans','zh-sg','zh-cn'),
-                       'zh-tw'   => array('zh-hant','zh-hk','zh-mo'),
-                       'zh-hk'   => array('zh-hant','zh-mo','zh-tw'),
-                       'zh-mo'   => array('zh-hant','zh-hk','zh-tw'),
-               );
-               $ml=array(
-                       'zh'      => 'disable',
+               $variants = [
+                       'zh',
+                       'zh-hans',
+                       'zh-hant',
+                       'zh-cn',
+                       'zh-hk',
+                       'zh-mo',
+                       'zh-my',
+                       'zh-sg',
+                       'zh-tw'
+               ];
+
+               $variantfallbacks = [
+                       'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
+                       'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
+                       'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
+                       'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
+                       'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
+                       'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
+                       'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
+                       'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
+                       'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
+               ];
+               $ml = [
+                       'zh' => 'disable',
                        'zh-hans' => 'unidirectional',
                        'zh-hant' => 'unidirectional',
-               );
+               ];
 
                $this->mConverter = new ZhConverter( $this, 'zh',
                                                                $variants, $variantfallbacks,
-                                                               array(),array(),
-                                                               $ml);
-
-               $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
+                                                               [],
+                                                               $ml );
        }
 
-       # this should give much better diff info
+       /**
+        * this should give much better diff info
+        *
+        * @param string $text
+        * @return string
+        */
        function segmentForDiff( $text ) {
-               return preg_replace(
-                       "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                       "' ' .\"$1\"", $text);
+               return preg_replace( '/[\xc0-\xff][\x80-\xbf]*/', ' $0', $text );
        }
 
+       /**
+        * @param string $text
+        * @return string
+        */
        function unsegmentForDiff( $text ) {
-               return preg_replace(
-                       "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                       "\"$1\"", $text);
+               return preg_replace( '/ ([\xc0-\xff][\x80-\xbf]*)/', '$1', $text );
        }
 
-       // word segmentation
-       function stripForSearch( $string ) {
-               wfProfileIn( __METHOD__ );
-
-               // eventually this should be a word segmentation
-               // for now just treat each character as a word
-               // @fixme only do this for Han characters...
-               $t = preg_replace(
-                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
-                               " $1", $string);
-
-        //always convert to zh-hans before indexing. it should be
-               //better to use zh-hans for search, since conversion from
-               //Traditional to Simplified is less ambiguous than the
-               //other way around
-
-               $t = $this->mConverter->autoConvert($t, 'zh-hans');
-               $t = parent::stripForSearch( $t );
-               wfProfileOut( __METHOD__ );
-               return $t;
-
+       /**
+        * auto convert to zh-hans and normalize special characters.
+        *
+        * @param string $string
+        * @param string $autoVariant Defaults to 'zh-hans'
+        * @return string
+        */
+       function normalizeForSearch( $string, $autoVariant = 'zh-hans' ) {
+               // always convert to zh-hans before indexing. it should be
+               // better to use zh-hans for search, since conversion from
+               // Traditional to Simplified is less ambiguous than the
+               // other way around
+               $s = $this->mConverter->autoConvert( $string, $autoVariant );
+               // LanguageZh_hans::normalizeForSearch
+               $s = parent::normalizeForSearch( $s );
+               return $s;
        }
 
+       /**
+        * @param array $termsArray
+        * @return array
+        */
        function convertForSearchResult( $termsArray ) {
                $terms = implode( '|', $termsArray );
+               $terms = self::convertDoubleWidth( $terms );
                $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
-               $ret = array_unique( explode('|', $terms) );
+               $ret = array_unique( explode( '|', $terms ) );
                return $ret;
        }
 }
-