]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - languages/LanguageConverter.php
MediaWiki 1.14.0-scripts
[autoinstalls/mediawiki.git] / languages / LanguageConverter.php
1 <?php
2
3 /**
4  * Contains the LanguageConverter class and ConverterRule class
5  * @ingroup Language
6  *
7  * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
8  * @file
9  */
10
11 /**
12  * base class for language convert
13  * @ingroup Language
14  *
15  * @author Zhengzhu Feng <zhengzhu@gmail.com>
16  * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com>
17  */
18 class LanguageConverter {
19         var $mPreferredVariant='';
20         var $mMainLanguageCode;
21         var $mVariants, $mVariantFallbacks, $mVariantNames;
22         var $mTablesLoaded = false;
23         var $mTables;
24         var $mTitleDisplay='';
25         var $mDoTitleConvert=true, $mDoContentConvert=true;
26         var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants
27         var $mTitleFromFlag = false;
28         var $mCacheKey;
29         var $mLangObj;
30         var $mMarkup;
31         var $mFlags;
32         var $mDescCodeSep = ':',$mDescVarSep = ';';
33         var $mUcfirst = false;
34
35         const CACHE_VERSION_KEY = 'VERSION 6';
36
37         /**
38          * Constructor
39          *
40          * @param string $maincode the main language code of this language
41          * @param array $variants the supported variants of this language
42          * @param array $variantfallback the fallback language of each variant
43          * @param array $markup array defining the markup used for manual conversion
44          * @param array $flags array defining the custom strings that maps to the flags
45          * @param array $manualLevel limit for supported variants
46          * @public
47          */
48         function __construct($langobj, $maincode,
49                                                                 $variants=array(),
50                                                                 $variantfallbacks=array(),
51                                                                 $markup=array(),
52                                                                 $flags = array(),
53                                                                 $manualLevel = array() ) {
54                 $this->mLangObj = $langobj;
55                 $this->mMainLanguageCode = $maincode;
56                 $this->mVariants = $variants;
57                 $this->mVariantFallbacks = $variantfallbacks;
58                 global $wgLanguageNames;
59                 $this->mVariantNames = $wgLanguageNames;
60                 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
61                 $m = array(
62                         'begin'=>'-{', 
63                         'flagsep'=>'|',
64                         'unidsep'=>'=>', //for unidirectional conversion
65                         'codesep'=>':',
66                         'varsep'=>';',
67                         'end'=>'}-'
68                 );
69                 $this->mMarkup = array_merge($m, $markup);
70                 $f = array( 
71                         // 'S' show converted text
72                         // '+' add rules for alltext
73                         // 'E' the gave flags is error
74                         // these flags above are reserved for program
75                         'A'=>'A',       // add rule for convert code (all text convert)
76                         'T'=>'T',       // title convert
77                         'R'=>'R',       // raw content
78                         'D'=>'D',       // convert description (subclass implement)
79                         '-'=>'-',       // remove convert (not implement)
80                         'H'=>'H',       // add rule for convert code (but no display in placed code )
81                         'N'=>'N'        // current variant name
82                 );
83                 $this->mFlags = array_merge($f, $flags);
84                 foreach( $this->mVariants as $v)
85                         $this->mManualLevel[$v]=array_key_exists($v,$manualLevel)
86                                                                 ?$manualLevel[$v]
87                                                                 :'bidirectional';
88         }
89
90         /**
91          * @public
92          */
93         function getVariants() {
94                 return $this->mVariants;
95         }
96
97         /**
98          * in case some variant is not defined in the markup, we need
99          * to have some fallback. for example, in zh, normally people
100          * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
101          * when zh-sg is preferred but not defined, we will pick zh-hans
102          * in this case. right now this is only used by zh.
103          *
104          * @param string $v the language code of the variant
105          * @return string array the code of the fallback language or false if there is no fallback
106          * @public
107          */
108         function getVariantFallbacks($v) {
109                 if( isset( $this->mVariantFallbacks[$v] ) ) {
110                         return $this->mVariantFallbacks[$v];
111                 }
112                 return $this->mMainLanguageCode;
113         }
114
115         /**
116          * get preferred language variants.
117          * @param boolean $fromUser Get it from $wgUser's preferences
118          * @return string the preferred language code
119          * @public
120          */
121         function getPreferredVariant( $fromUser = true ) {
122                 global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant;
123
124                 if($this->mPreferredVariant)
125                         return $this->mPreferredVariant;
126
127                 // figure out user lang without constructing wgLang to avoid infinite recursion
128                 if( $fromUser )
129                         $defaultUserLang = $wgUser->getOption( 'language' );
130                 else
131                         $defaultUserLang = $this->mMainLanguageCode;
132                 $userLang = $wgRequest->getVal( 'uselang', $defaultUserLang );
133                 // see if interface language is same as content, if not, prevent conversion
134                 if( ! in_array( $userLang, $this->mVariants ) ){ 
135                         $this->mPreferredVariant = $this->mMainLanguageCode; // no conversion
136                         return $this->mPreferredVariant;
137                 }
138
139                 // see if the preference is set in the request
140                 $req = $wgRequest->getText( 'variant' );
141                 if( in_array( $req, $this->mVariants ) ) {
142                         $this->mPreferredVariant = $req;
143                         return $req;
144                 }
145
146                 // check the syntax /code/ArticleTitle
147                 if($wgVariantArticlePath!=false && isset($_SERVER['SCRIPT_NAME'])){
148                         // Note: SCRIPT_NAME probably won't hold the correct value if PHP is run as CGI
149                         // (it will hold path to php.cgi binary), and might not exist on some very old PHP installations
150                         $scriptBase = basename( $_SERVER['SCRIPT_NAME'] );
151                         if(in_array($scriptBase,$this->mVariants)){
152                                 $this->mPreferredVariant = $scriptBase;
153                                 return $this->mPreferredVariant;
154                         }
155                 }
156
157                 // get language variant preference from logged in users
158                 // Don't call this on stub objects because that causes infinite 
159                 // recursion during initialisation
160                 if( $fromUser && $wgUser->isLoggedIn() )  {
161                         $this->mPreferredVariant = $wgUser->getOption('variant');
162                         return $this->mPreferredVariant;
163                 }
164
165                 // see if default variant is globaly set
166                 if($wgDefaultLanguageVariant != false  &&  in_array( $wgDefaultLanguageVariant, $this->mVariants )){
167                         $this->mPreferredVariant = $wgDefaultLanguageVariant;
168                         return $this->mPreferredVariant;
169                 }
170
171                 # FIXME rewrite code for parsing http header. The current code
172                 # is written specific for detecting zh- variants
173                 if( !$this->mPreferredVariant ) {
174                         // see if some supported language variant is set in the
175                         // http header, but we don't set the mPreferredVariant
176                         // variable in case this is called before the user's
177                         // preference is loaded
178                         $pv=$this->mMainLanguageCode;
179                         if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
180                                 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
181                                 $zh = strstr($header, $pv.'-');
182                                 if($zh) {
183                                         $ary = split("[,;]",$zh);
184                                         $pv = $ary[0];
185                                 }
186                         }
187                         // don't try to return bad variant
188                         if(in_array( $pv, $this->mVariants ))
189                                 return $pv;
190                 }
191
192                 return $this->mMainLanguageCode;
193
194         }
195         
196         /**
197          * dictionary-based conversion
198          *
199          * @param string $text the text to be converted
200          * @param string $toVariant the target language code
201          * @return string the converted text
202          * @private
203          */
204         function autoConvert($text, $toVariant=false) {
205                 $fname="LanguageConverter::autoConvert";
206
207                 wfProfileIn( $fname );
208
209                 if(!$this->mTablesLoaded)
210                         $this->loadTables();
211
212                 if(!$toVariant)
213                         $toVariant = $this->getPreferredVariant();
214                 if(!in_array($toVariant, $this->mVariants))
215                         return $text;
216
217                 /* we convert everything except:
218                    1. html markups (anything between < and >)
219                    2. html entities
220                    3. place holders created by the parser
221                 */
222                 global $wgParser;
223                 if (isset($wgParser) && $wgParser->UniqPrefix()!=''){
224                         $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
225                 } else
226                         $marker = "";
227
228                 // this one is needed when the text is inside an html markup
229                 $htmlfix = '|<[^>]+$|^[^<>]*>';
230
231                 // disable convert to variants between <code></code> tags
232                 $codefix = '<code>.+?<\/code>|';
233                 // disable convertsion of <script type="text/javascript"> ... </script>
234                 $scriptfix = '<script.*?>.*?<\/script>|';
235                 // disable conversion of <pre xxxx> ... </pre>
236                 $prefix = '<pre.*?>.*?<\/pre>|';
237
238                 $reg = '/'.$codefix . $scriptfix . $prefix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
239
240                 $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
241
242                 $m = array_shift($matches);
243
244                 $ret = $this->translate($m[0], $toVariant);
245                 $mstart = $m[1]+strlen($m[0]);
246                 foreach($matches as $m) {
247                         $ret .= substr($text, $mstart, $m[1]-$mstart);
248                         $ret .= $this->translate($m[0], $toVariant);
249                         $mstart = $m[1] + strlen($m[0]);
250                 }
251                 wfProfileOut( $fname );
252                 return $ret;
253         }
254
255         /**
256          * Translate a string to a variant
257          * Doesn't process markup or do any of that other stuff, for that use convert()
258          *
259          * @param string $text Text to convert
260          * @param string $variant Variant language code
261          * @return string Translated text
262          * @private
263          */
264         function translate( $text, $variant ) {
265                 wfProfileIn( __METHOD__ );
266                 if( !$this->mTablesLoaded )
267                         $this->loadTables();
268                 $text = $this->mTables[$variant]->replace( $text );
269                 wfProfileOut( __METHOD__ );
270                 return $text;
271         }
272
273         /**
274          * convert text to all supported variants
275          *
276          * @param string $text the text to be converted
277          * @return array of string
278          * @public
279          */
280         function autoConvertToAllVariants($text) {
281                 $fname="LanguageConverter::autoConvertToAllVariants";
282                 wfProfileIn( $fname );
283                 if( !$this->mTablesLoaded )
284                         $this->loadTables();
285
286                 $ret = array();
287                 foreach($this->mVariants as $variant) {
288                         $ret[$variant] = $this->translate($text, $variant);
289                 }
290
291                 wfProfileOut( $fname );
292                 return $ret;
293         }
294
295         /**
296          * convert link text to all supported variants
297          *
298          * @param string $text the text to be converted
299          * @return array of string
300          * @public
301          */
302         function convertLinkToAllVariants($text) {
303                 if( !$this->mTablesLoaded )
304                         $this->loadTables();
305
306                 $ret = array();
307                 $tarray = explode($this->mMarkup['begin'], $text);
308                 $tfirst = array_shift($tarray);
309
310                 foreach($this->mVariants as $variant)
311                         $ret[$variant] = $this->translate($tfirst,$variant);
312
313                 foreach($tarray as $txt) {
314                         $marked = explode($this->mMarkup['end'], $txt, 2);
315
316                         foreach($this->mVariants as $variant){
317                                 $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end'];
318                                 if(array_key_exists(1, $marked))
319                                         $ret[$variant] .= $this->translate($marked[1],$variant);
320                         }
321                         
322                 }
323
324                 return $ret;
325         }
326
327
328         /**
329          * apply manual conversion
330          * @private
331          */
332         function applyManualConv($convRule){
333                 // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
334                 $title = $convRule->getTitle();
335                 if($title){
336                         $this->mTitleFromFlag = true;
337                         $this->mTitleDisplay =  $title;
338                 }
339
340                 //apply manual conversion table to global table
341                 $convTable = $convRule->getConvTable();
342                 $action = $convRule->getRulesAction();
343                 foreach($convTable as $v=>$t) {
344                         if( !in_array($v,$this->mVariants) )continue;
345                         if( $action=="add" )
346                                 $this->mTables[$v]->mergeArray($t);
347                         elseif ( $action=="remove" )
348                                 $this->mTables[$v]->removeArray($t);
349                 }
350         }
351
352         /**
353          * Convert text using a parser object for context
354          * @public
355          */
356         function parserConvert( $text, &$parser ) {
357                 global $wgDisableLangConversion;
358                 /* don't do anything if this is the conversion table */
359                 if ( $parser->getTitle()->getNamespace() == NS_MEDIAWIKI &&
360                                  strpos($parser->mTitle->getText(), "Conversiontable") !== false ) 
361                 {
362                         return $text;
363                 }
364
365                 if($wgDisableLangConversion)
366                         return $text;
367
368                 $text = $this->convert( $text );
369                 $parser->mOutput->setTitleText( $this->mTitleDisplay );
370                 return $text;
371         }
372
373         /**
374          *  convert title
375          * @private
376          */
377         function convertTitle($text){
378                 global $wgDisableTitleConversion, $wgUser;
379
380                 // check for global param and __NOTC__ tag
381                 if( $wgDisableTitleConversion || !$this->mDoTitleConvert || $wgUser->getOption('noconvertlink') == 1 ) {
382                         $this->mTitleDisplay = $text;
383                         return $text;
384                 }
385
386                 // use the title from the T flag if any
387                 if($this->mTitleFromFlag){
388                         $this->mTitleFromFlag = false;
389                         return $this->mTitleDisplay;
390                 }
391
392                 global $wgRequest;
393                 $isredir = $wgRequest->getText( 'redirect', 'yes' );
394                 $action = $wgRequest->getText( 'action' );
395                 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
396                 if ( $isredir == 'no' || $action == 'edit' || $action == 'submit' || $linkconvert == 'no' ) {
397                         return $text;
398                 } else {
399                         $this->mTitleDisplay = $this->convert($text);
400                         return $this->mTitleDisplay;
401                 }
402         }
403
404         /**
405          * convert text to different variants of a language. the automatic
406          * conversion is done in autoConvert(). here we parse the text
407          * marked with -{}-, which specifies special conversions of the
408          * text that can not be accomplished in autoConvert()
409          *
410          * syntax of the markup:
411          * -{code1:text1;code2:text2;...}-  or
412          * -{flags|code1:text1;code2:text2;...}-  or
413          * -{text}- in which case no conversion should take place for text
414          *
415          * @param string $text text to be converted
416          * @param bool $isTitle whether this conversion is for the article title
417          * @return string converted text
418          * @public
419          */
420         function convert( $text , $isTitle=false) {
421
422                 $mw =& MagicWord::get( 'notitleconvert' );
423                 if( $mw->matchAndRemove( $text ) )
424                         $this->mDoTitleConvert = false;
425                 $mw =& MagicWord::get( 'nocontentconvert' );
426                 if( $mw->matchAndRemove( $text ) ) {
427                         $this->mDoContentConvert = false;
428                 }
429
430                 // no conversion if redirecting
431                 $mw =& MagicWord::get( 'redirect' );
432                 if( $mw->matchStart( $text ))
433                         return $text;
434
435                 // for title convertion
436                 if ($isTitle) return $this->convertTitle($text);
437
438                 $plang = $this->getPreferredVariant();
439                 $tarray = StringUtils::explode($this->mMarkup['end'], $text);
440                 $text = '';
441                 $lastDelim = false;
442                 foreach($tarray as $txt) {
443                         $marked = explode($this->mMarkup['begin'], $txt, 2);
444
445                         if( $this->mDoContentConvert )
446                                 $text .= $this->autoConvert($marked[0],$plang);
447                         else
448                                 $text .= $marked[0];
449
450                         if(array_key_exists(1, $marked)){
451                                 // strip the flags from syntax like -{T| ... }-
452                                 $crule = new ConverterRule($marked[1], $this);
453                                 $crule->parse($plang);
454
455                                 $text .= $crule->getDisplay();
456                                 $this->applyManualConv($crule);
457                                 $lastDelim = false;
458                         } else {
459                                 // Reinsert the }- which wasn't part of anything
460                                 $text .= $this->mMarkup['end'];
461                                 $lastDelim = true;
462                         }
463                 }
464                 if ( $lastDelim ) {
465                         // Remove the last delimiter (wasn't real)
466                         $text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) );
467                 }
468
469                 return $text;
470         }
471
472         /**
473          * if a language supports multiple variants, it is
474          * possible that non-existing link in one variant
475          * actually exists in another variant. this function
476          * tries to find it. See e.g. LanguageZh.php
477          *
478          * @param string $link the name of the link
479          * @param mixed $nt the title object of the link
480          * @return null the input parameters may be modified upon return
481          * @public
482          */
483         function findVariantLink( &$link, &$nt, $forTemplate = false ) {
484                 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, $wgUser;
485                 $isredir = $wgRequest->getText( 'redirect', 'yes' );
486                 $action = $wgRequest->getText( 'action' );
487                 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
488                 $disableLinkConversion = $wgDisableLangConversion || $wgDisableTitleConversion;
489                 $linkBatch = new LinkBatch();
490
491                 $ns=NS_MAIN;
492
493                 if ( $disableLinkConversion || ( !$forTemplate && ( $isredir == 'no' || $action == 'edit'
494                         || $action == 'submit' || $linkconvert == 'no' || $wgUser->getOption('noconvertlink') == 1 ) ) ) {
495                         return;
496                 }
497
498                 if(is_object($nt))
499                         $ns = $nt->getNamespace();
500
501                 $variants = $this->autoConvertToAllVariants($link);
502                 if($variants == false) //give up
503                         return;
504
505                 $titles = array();
506
507                 foreach( $variants as $v ) {
508                         if($v != $link){
509                                 $varnt = Title::newFromText( $v, $ns );
510                                 if(!is_null($varnt)){
511                                         $linkBatch->addObj($varnt);
512                                         $titles[]=$varnt;
513                                 }
514                         }
515                 }
516
517                 // fetch all variants in single query
518                 $linkBatch->execute();
519
520                 foreach( $titles as $varnt ) {
521                         if( $varnt->getArticleID() > 0 ) {
522                                 $nt = $varnt;
523                                 $link = $v;
524                                 break;
525                         }
526                 }
527         }
528
529     /**
530          * returns language specific hash options
531          *
532          * @public
533          */
534         function getExtraHashOptions() {
535                 $variant = $this->getPreferredVariant();
536                 return '!' . $variant ;
537         }
538
539     /**
540          * get title text as defined in the body of the article text
541          *
542          * @public
543          */
544         function getParsedTitle() {
545                 return $this->mTitleDisplay;
546         }
547
548         /**
549          * a write lock to the cache
550          *
551          * @private
552          */
553         function lockCache() {
554                 global $wgMemc;
555                 $success = false;
556                 for($i=0; $i<30; $i++) {
557                         if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
558                                 break;
559                         sleep(1);
560                 }
561                 return $success;
562         }
563
564         /**
565          * unlock cache
566          *
567          * @private
568          */
569         function unlockCache() {
570                 global $wgMemc;
571                 $wgMemc->delete($this->mCacheKey . "lock");
572         }
573
574
575         /**
576          * Load default conversion tables
577          * This method must be implemented in derived class
578          *
579          * @private
580          */
581         function loadDefaultTables() {
582                 $name = get_class($this);
583                 wfDie("Must implement loadDefaultTables() method in class $name");
584         }
585
586         /**
587          * load conversion tables either from the cache or the disk
588          * @private
589          */
590         function loadTables($fromcache=true) {
591                 global $wgMemc;
592                 if( $this->mTablesLoaded )
593                         return;
594                 wfProfileIn( __METHOD__ );
595                 $this->mTablesLoaded = true;
596                 $this->mTables = false;
597                 if($fromcache) {
598                         wfProfileIn( __METHOD__.'-cache' );
599                         $this->mTables = $wgMemc->get( $this->mCacheKey );
600                         wfProfileOut( __METHOD__.'-cache' );
601                 }
602                 if ( !$this->mTables || !isset( $this->mTables[self::CACHE_VERSION_KEY] ) ) {
603                         wfProfileIn( __METHOD__.'-recache' );
604                         // not in cache, or we need a fresh reload.
605                         // we will first load the default tables
606                         // then update them using things in MediaWiki:Zhconversiontable/*
607                         $this->loadDefaultTables();
608                         foreach($this->mVariants as $var) {
609                                 $cached = $this->parseCachedTable($var);
610                                 $this->mTables[$var]->mergeArray($cached);
611                         }
612
613                         $this->postLoadTables();
614                         $this->mTables[self::CACHE_VERSION_KEY] = true;
615
616                         if($this->lockCache()) {
617                                 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
618                                 $this->unlockCache();
619                         }
620                         wfProfileOut( __METHOD__.'-recache' );
621                 }
622                 wfProfileOut( __METHOD__ );
623         }
624
625     /**
626          * Hook for post processig after conversion tables are loaded
627          *
628          */
629         function postLoadTables() {}
630
631     /**
632          * Reload the conversion tables
633          *
634          * @private
635          */
636         function reloadTables() {
637                 if($this->mTables)
638                         unset($this->mTables);
639                 $this->mTablesLoaded = false;
640                 $this->loadTables(false);
641         }
642
643
644         /**
645          * parse the conversion table stored in the cache
646          *
647          * the tables should be in blocks of the following form:
648          *              -{
649          *                      word => word ;
650          *                      word => word ;
651          *                      ...
652          *              }-
653          *
654          *      to make the tables more manageable, subpages are allowed
655          *      and will be parsed recursively if $recursive=true
656          *
657          */
658         function parseCachedTable($code, $subpage='', $recursive=true) {
659                 global $wgMessageCache;
660                 static $parsed = array();
661
662                 if(!is_object($wgMessageCache))
663                         return array();
664
665                 $key = 'Conversiontable/'.$code;
666                 if($subpage)
667                         $key .= '/' . $subpage;
668
669                 if(array_key_exists($key, $parsed))
670                         return array();
671
672                 if ( strpos( $code, '/' ) === false ) {
673                         $txt = $wgMessageCache->get( 'Conversiontable', true, $code );
674                 } else {
675                         $title = Title::makeTitleSafe( NS_MEDIAWIKI, "Conversiontable/$code" );
676                         if ( $title && $title->exists() ) {
677                                 $article = new Article( $title );
678                                 $txt = $article->getContents();
679                         } else {
680                                 $txt = '';
681                         }
682                 }
683
684                 // get all subpage links of the form
685                 // [[MediaWiki:conversiontable/zh-xx/...|...]]
686                 $linkhead = $this->mLangObj->getNsText(NS_MEDIAWIKI) . ':Conversiontable';
687                 $subs = explode('[[', $txt);
688                 $sublinks = array();
689                 foreach( $subs as $sub ) {
690                         $link = explode(']]', $sub, 2);
691                         if(count($link) != 2)
692                                 continue;
693                         $b = explode('|', $link[0]);
694                         $b = explode('/', trim($b[0]), 3);
695                         if(count($b)==3)
696                                 $sublink = $b[2];
697                         else
698                                 $sublink = '';
699
700                         if($b[0] == $linkhead && $b[1] == $code) {
701                                 $sublinks[] = $sublink;
702                         }
703                 }
704
705
706                 // parse the mappings in this page
707                 $blocks = explode($this->mMarkup['begin'], $txt);
708                 array_shift($blocks);
709                 $ret = array();
710                 foreach($blocks as $block) {
711                         $mappings = explode($this->mMarkup['end'], $block, 2);
712                         $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
713                         $table = explode( ';', $stripped );
714                         foreach( $table as $t ) {
715                                 $m = explode( '=>', $t );
716                                 if( count( $m ) != 2)
717                                         continue;
718                                 // trim any trailling comments starting with '//'
719                                 $tt = explode('//', $m[1], 2);
720                                 $ret[trim($m[0])] = trim($tt[0]);
721                         }
722                 }
723                 $parsed[$key] = true;
724
725
726                 // recursively parse the subpages
727                 if($recursive) {
728                         foreach($sublinks as $link) {
729                                 $s = $this->parseCachedTable($code, $link, $recursive);
730                                 $ret = array_merge($ret, $s);
731                         }
732                 }
733
734                 if ($this->mUcfirst) {
735                         foreach ($ret as $k => $v) {
736                                 $ret[Language::ucfirst($k)] = Language::ucfirst($v);
737                         }
738                 }
739                 return $ret;
740         }
741
742         /**
743          * Enclose a string with the "no conversion" tag. This is used by
744          * various functions in the Parser
745          *
746          * @param string $text text to be tagged for no conversion
747          * @return string the tagged text
748          * @public
749          */
750         function markNoConversion($text, $noParse=false) {
751                 # don't mark if already marked
752                 if(strpos($text, $this->mMarkup['begin']) ||
753                    strpos($text, $this->mMarkup['end']))
754                         return $text;
755
756                 $ret = $this->mMarkup['begin'] .'R|'. $text . $this->mMarkup['end'];
757                 return $ret;
758         }
759
760         /**
761          * convert the sorting key for category links. this should make different
762          * keys that are variants of each other map to the same key
763          */
764         function convertCategoryKey( $key ) {
765                 return $key;
766         }
767         /**
768          * hook to refresh the cache of conversion tables when
769          * MediaWiki:conversiontable* is updated
770          * @private
771          */
772         function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section, $flags, $revision) {
773                 $titleobj = $article->getTitle();
774                 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
775                         $title = $titleobj->getDBkey();
776                         $t = explode('/', $title, 3);
777                         $c = count($t);
778                         if( $c > 1 && $t[0] == 'Conversiontable' ) {
779                                 if(in_array($t[1], $this->mVariants)) {
780                                         $this->reloadTables();
781                                 }
782                         }
783                 }
784                 return true;
785         }
786
787         /** 
788          * Armour rendered math against conversion
789          * Wrap math into rawoutput -{R| math }- syntax
790          * @public
791          */
792         function armourMath($text){ 
793                 $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
794                 return $ret;
795         }
796 }
797
798 /**
799  * parser for rules of language conversion , parse rules in -{ }- tag
800  * @ingroup Language
801  * @author  fdcn <fdcn64@gmail.com>
802  */
803 class ConverterRule {
804         var $mText; // original text in -{text}-
805         var $mConverter; // LanguageConverter object 
806         var $mManualCodeError='<strong class="error">code error!</strong>';
807         var $mRuleDisplay = '',$mRuleTitle=false;
808         var $mRules = '';// string : the text of the rules
809         var $mRulesAction = 'none';
810         var $mFlags = array();
811         var $mConvTable = array();
812         var $mBidtable = array();// array of the translation in each variant
813         var $mUnidtable = array();// array of the translation in each variant
814
815         /**
816          * Constructor
817          *
818          * @param string $text the text between -{ and }-
819          * @param object $converter a  LanguageConverter object 
820          * @access public
821          */
822         function __construct($text,$converter){
823                 $this->mText = $text;
824                 $this->mConverter=$converter;
825                 foreach($converter->mVariants as $v){
826                         $this->mConvTable[$v]=array();
827                 }
828         }
829
830         /**
831          * check if variants array in convert array
832          *
833          * @param string $variant Variant language code
834          * @return string Translated text
835          * @public
836          */
837         function getTextInBidtable($variants){
838                 if(is_string($variants)){ $variants=array($variants); }
839                 if(!is_array($variants)) return false;
840                 foreach ($variants as $variant){
841                         if(array_key_exists($variant, $this->mBidtable)){
842                                 return $this->mBidtable[$variant];
843                         }
844                 }
845                 return false;
846         }
847         
848         /**
849          * Parse flags with syntax -{FLAG| ... }-
850          * @private
851          */
852         function parseFlags(){
853                 $text = $this->mText;
854                 if(strlen($text) < 2 ) {
855                         $this->mFlags = array( 'R' );
856                         $this->mRules = $text;
857                         return;
858                 }
859
860                 $flags = array();
861                 $markup = $this->mConverter->mMarkup;
862                 $validFlags = $this->mConverter->mFlags;
863
864                 $tt = explode($markup['flagsep'], $text, 2);
865                 if(count($tt) == 2) {
866                         $f = explode($markup['varsep'], $tt[0]);
867                         foreach($f as $ff) {
868                                 $ff = trim($ff);
869                                 if(array_key_exists($ff, $validFlags) &&
870                                                         !in_array($validFlags[$ff], $flags))
871                                         $flags[] = $validFlags[$ff];
872                         }
873                         $rules = $tt[1];
874                 } else {
875                         $rules = $text;
876                 }
877
878                 //check flags
879                 if( in_array('R',$flags) ){
880                         $flags = array('R');// remove other flags
881                 } elseif ( in_array('N',$flags) ){
882                         $flags = array('N');// remove other flags
883                 } elseif ( in_array('-',$flags) ){
884                         $flags = array('-');// remove other flags
885                 } elseif (count($flags)==1 && $flags[0]=='T'){
886                         $flags[]='H'; 
887                 } elseif ( in_array('H',$flags) ){
888                         // replace A flag, and remove other flags except T
889                         $temp=array('+','H');
890                         if(in_array('T',$flags)) $temp[] = 'T';
891                         if(in_array('D',$flags)) $temp[] = 'D';
892                         $flags = $temp;
893                 } else {
894                         if ( in_array('A',$flags)) {
895                                 $flags[]='+';
896                                 $flags[]='S';
897                         }
898                         if ( in_array('D',$flags) )
899                                 $flags=array_diff($flags,array('S'));
900                 }
901                 if ( count($flags)==0 )
902                         $flags = array('S');
903                 $this->mRules=$rules;
904                 $this->mFlags=$flags;
905         }
906         
907         /**
908          * generate conversion table
909          * @private
910          */
911         function parseRules() {
912                 $rules = $this->mRules;
913                 $flags = $this->mFlags;
914                 $bidtable = array();
915                 $unidtable = array();
916                 $markup = $this->mConverter->mMarkup;
917
918                 $choice = explode($markup['varsep'], $rules );
919                 foreach($choice as $c) {
920                         $v = explode($markup['codesep'], $c);
921                         if(count($v) != 2) 
922                                 continue;// syntax error, skip
923                         $to=trim($v[1]);
924                         $v=trim($v[0]);
925                         $u = explode($markup['unidsep'], $v);
926                         if(count($u) == 1) {
927                                 $bidtable[$v] = $to;
928                         } else if(count($u) == 2){
929                                 $from=trim($u[0]);$v=trim($u[1]);
930                                 if( array_key_exists($v,$unidtable) && !is_array($unidtable[$v]) )
931                                         $unidtable[$v]=array($from=>$to);
932                                 else
933                                         $unidtable[$v][$from]=$to;
934                         }
935                         // syntax error, pass
936                         if (!array_key_exists($v,$this->mConverter->mVariantNames)){
937                                 $bidtable = array();
938                                 $unidtable = array();
939                                 break;
940                         }
941                 }
942                 $this->mBidtable = $bidtable;
943                 $this->mUnidtable = $unidtable;
944         }
945
946         /**
947          * @private
948          */
949         function getRulesDesc(){
950                 $codesep = $this->mConverter->mDescCodeSep;
951                 $varsep = $this->mConverter->mDescVarSep;
952                 $text='';
953                 foreach($this->mBidtable as $k => $v)
954                         $text .= $this->mConverter->mVariantNames[$k]."$codesep$v$varsep";
955                 foreach($this->mUnidtable as $k => $a)
956                         foreach($a as $from=>$to)
957                                 $text.=$from.'⇒'.$this->mConverter->mVariantNames[$k]."$codesep$to$varsep";
958                 return $text;
959         }
960
961         /**
962          *  Parse rules conversion
963          * @private
964          */
965         function getRuleConvertedStr($variant,$doConvert){
966                 $bidtable = $this->mBidtable;
967                 $unidtable = $this->mUnidtable;
968
969                 if( count($bidtable) + count($unidtable) == 0 ){
970                         return $this->mRules;
971                 } elseif ($doConvert){// the text converted 
972                         // display current variant in bidirectional array
973                         $disp = $this->getTextInBidtable($variant);
974                         // or display current variant in fallbacks
975                         if(!$disp)
976                                 $disp = $this->getTextInBidtable(
977                                                 $this->mConverter->getVariantFallbacks($variant));
978                         // or display current variant in unidirectional array
979                         if(!$disp && array_key_exists($variant,$unidtable)){
980                                 $disp = array_values($unidtable[$variant]);
981                                 $disp = $disp[0];
982                         }
983                         // or display frist text under disable manual convert
984                         if(!$disp && $this->mConverter->mManualLevel[$variant]=='disable') {
985                                 if(count($bidtable)>0){
986                                         $disp = array_values($bidtable);
987                                         $disp = $disp[0];
988                                 } else {
989                                         $disp = array_values($unidtable);
990                                         $disp = array_values($disp[0]);
991                                         $disp = $disp[0];
992                                 }
993                         }
994                         return $disp;
995                 } else {// no convert
996                         return $this->mRules;
997                 }
998         }
999
1000         /**
1001          * generate conversion table for all text
1002          * @private
1003          */
1004         function generateConvTable(){
1005                 $flags = $this->mFlags;
1006                 $bidtable = $this->mBidtable;
1007                 $unidtable = $this->mUnidtable;
1008                 $manLevel = $this->mConverter->mManualLevel;
1009
1010                 $vmarked=array();
1011                 foreach($this->mConverter->mVariants as $v) {
1012                         /* for bidirectional array
1013                                 fill in the missing variants, if any,
1014                                 with fallbacks */
1015                         if(!array_key_exists($v, $bidtable)) {
1016                                 $variantFallbacks = $this->mConverter->getVariantFallbacks($v);
1017                                 $vf = $this->getTextInBidtable($variantFallbacks);
1018                                 if($vf) $bidtable[$v] = $vf;
1019                         }
1020
1021                         if(array_key_exists($v,$bidtable)){
1022                                 foreach($vmarked as $vo){
1023                                         // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- 
1024                                         // or -{H|zh:WordZh;zh-tw:WordTw}- or -{-|zh:WordZh;zh-tw:WordTw}-
1025                                         // to introduce a custom mapping between
1026                                         // words WordZh and WordTw in the whole text 
1027                                         if($manLevel[$v]=='bidirectional'){
1028                                                 $this->mConvTable[$v][$bidtable[$vo]]=$bidtable[$v];
1029                                         }
1030                                         if($manLevel[$vo]=='bidirectional'){
1031                                                 $this->mConvTable[$vo][$bidtable[$v]]=$bidtable[$vo];
1032                                         }
1033                                 }
1034                                 $vmarked[]=$v;
1035                         }
1036                         /*for unidirectional array
1037                                 fill to convert tables */
1038                         $allow_unid = $manLevel[$v]=='bidirectional' 
1039                                         || $manLevel[$v]=='unidirectional';
1040                         if($allow_unid && array_key_exists($v,$unidtable)){
1041                                 $ct=$this->mConvTable[$v];
1042                                 $this->mConvTable[$v] = array_merge($ct,$unidtable[$v]);
1043                         }
1044                 }
1045         }
1046
1047         /**
1048          * Parse rules and flags
1049          * @public
1050          */
1051         function parse($variant){
1052                 if(!$variant) $variant = $this->mConverter->getPreferredVariant();
1053
1054                 $this->parseFlags();
1055                 $flags = $this->mFlags;
1056
1057                 if( !in_array('R',$flags) || !in_array('N',$flags) ){
1058                         //FIXME: may cause trouble here...
1059                         //strip &nbsp; since it interferes with the parsing, plus,
1060                         //all spaces should be stripped in this tag anyway.
1061                         $this->mRules = str_replace('&nbsp;', '', $this->mRules);
1062                         // decode => HTML entities modified by Sanitizer::removeHTMLtags 
1063                         $this->mRules = str_replace('=&gt;','=>',$this->mRules);
1064
1065                         $this->parseRules();
1066                 }
1067                 $rules = $this->mRules;
1068
1069                 if(count($this->mBidtable)==0 && count($this->mUnidtable)==0){
1070                         if(in_array('+',$flags) || in_array('-',$flags))
1071                                 // fill all variants if text in -{A/H/-|text} without rules
1072                                 foreach($this->mConverter->mVariants as $v)
1073                                         $this->mBidtable[$v] = $rules;
1074                         elseif (!in_array('N',$flags) && !in_array('T',$flags) )
1075                                 $this->mFlags = $flags = array('R');
1076                 }
1077
1078                 if( in_array('R',$flags) ) {
1079                         // if we don't do content convert, still strip the -{}- tags
1080                         $this->mRuleDisplay = $rules;
1081                 } elseif ( in_array('N',$flags) ){
1082                         // proces N flag: output current variant name
1083                         $this->mRuleDisplay = $this->mConverter->mVariantNames[trim($rules)];
1084                 } elseif ( in_array('D',$flags) ){
1085                         // proces D flag: output rules description
1086                         $this->mRuleDisplay = $this->getRulesDesc();
1087                 } elseif ( in_array('H',$flags) || in_array('-',$flags) ) {
1088                         // proces H,- flag or T only: output nothing
1089                         $this->mRuleDisplay = '';
1090                 } elseif ( in_array('S',$flags) ){
1091                         $this->mRuleDisplay = $this->getRuleConvertedStr($variant,
1092                                                         $this->mConverter->mDoContentConvert);
1093                 } else {
1094                         $this->mRuleDisplay= $this->mManualCodeError;
1095                 }
1096                 // proces T flag
1097                 if ( in_array('T',$flags) ) {
1098                         $this->mRuleTitle = $this->getRuleConvertedStr($variant,
1099                                                         $this->mConverter->mDoTitleConvert);
1100                 }
1101
1102                 if (in_array('-', $flags))
1103                         $this->mRulesAction='remove';
1104                 if (in_array('+', $flags))
1105                         $this->mRulesAction='add';
1106                 
1107                 $this->generateConvTable();
1108         }
1109         
1110         /**
1111          * @public
1112          */
1113         function hasRules(){
1114                 // TODO:
1115         }
1116
1117         /**
1118          * get display text on markup -{...}-
1119          * @public
1120          */
1121         function getDisplay(){
1122                 return $this->mRuleDisplay;
1123         }
1124         /**
1125          * get converted title
1126          * @public
1127          */
1128         function getTitle(){
1129                 return $this->mRuleTitle;
1130         }
1131
1132         /**
1133          * return how deal with conversion rules
1134          * @public
1135          */
1136         function getRulesAction(){
1137                 return $this->mRulesAction;
1138         }
1139
1140         /**
1141          * get conversion table ( bidirectional and unidirectional conversion table )
1142          * @public
1143          */
1144         function getConvTable(){
1145                 return $this->mConvTable;
1146         }
1147
1148         /**
1149          * get conversion rules string
1150          * @public
1151          */
1152         function getRules(){
1153                 return $this->mRules;
1154         }
1155
1156         /**
1157          * get conversion flags
1158          * @public
1159          */
1160         function getFlags(){
1161                 return $this->mFlags;
1162         }
1163 }