]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - languages/LanguageConverter.php
MediaWiki 1.5.8 (initial commit)
[autoinstallsdev/mediawiki.git] / languages / LanguageConverter.php
1 <?php
2 /**
3   * @package MediaWiki
4   * @subpackage Language
5   *
6   * @author Zhengzhu Feng <zhengzhu@gmail.com>
7   * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
8   */
9
10 class LanguageConverter {
11         var $mPreferredVariant='';
12         var $mMainLanguageCode;
13         var $mVariants, $mVariantFallbacks;
14         var $mTablesLoaded = false;
15         var $mTables;
16         var $mTitleDisplay='';
17         var $mDoTitleConvert=true, $mDoContentConvert=true;
18         var $mCacheKey;
19         var $mLangObj;
20         var $mMarkup;
21         var $mFlags;
22         var $mUcfirst = false;
23         /**
24      * Constructor
25          *
26      * @param string $maincode the main language code of this language
27      * @param array $variants the supported variants of this language
28      * @param array $variantfallback the fallback language of each variant
29      * @param array $markup array defining the markup used for manual conversion
30          * @param array $flags array defining the custom strings that maps to the flags
31      * @access public
32      */
33         function LanguageConverter($langobj, $maincode,
34                                                                 $variants=array(),
35                                                                 $variantfallbacks=array(),
36                                                                 $markup=array(),
37                                                                 $flags = array()) {
38                 global $wgDBname;
39                 $this->mLangObj = $langobj;
40                 $this->mMainLanguageCode = $maincode;
41                 $this->mVariants = $variants;
42                 $this->mVariantFallbacks = $variantfallbacks;
43                 $this->mCacheKey = $wgDBname . ":conversiontables";
44                 $m = array('begin'=>'-{', 'flagsep'=>'|', 'codesep'=>':',
45                                    'varsep'=>';', 'end'=>'}-');
46                 $this->mMarkup = array_merge($m, $markup);
47                 $f = array('A'=>'A', 'T'=>'T');
48                 $this->mFlags = array_merge($f, $flags);
49         }
50
51         /**
52      * @access public
53      */
54         function getVariants() {
55                 return $this->mVariants;
56         }
57
58         /**
59          * in case some variant is not defined in the markup, we need
60          * to have some fallback. for example, in zh, normally people
61          * will define zh-cn and zh-tw, but less so for zh-sg or zh-hk.
62          * when zh-sg is preferred but not defined, we will pick zh-cn
63          * in this case. right now this is only used by zh.
64          *
65          * @param string $v the language code of the variant
66          * @return string the code of the fallback language or false if there is no fallback
67      * @access private
68         */
69         function getVariantFallback($v) {
70                 return $this->mVariantFallbacks[$v];
71         }
72
73
74         /**
75      * get preferred language variants.
76      * @return string the preferred language code
77      * @access public
78         */
79         function getPreferredVariant() {
80                 global $wgUser, $wgRequest;
81
82                 if($this->mPreferredVariant)
83                         return $this->mPreferredVariant;
84
85                 // see if the preference is set in the request
86                 $req = $wgRequest->getText( 'variant' );
87                 if( in_array( $req, $this->mVariants ) ) {
88                         $this->mPreferredVariant = $req;
89                         return $req;
90                 }
91
92                 // get language variant preference from logged in users
93                 if(is_object($wgUser) && $wgUser->isLoggedIn() )  {
94                         $this->mPreferredVariant = $wgUser->getOption('variant');
95                         return $this->mPreferredVariant;
96                 }
97
98                 # FIXME rewrite code for parsing http header. The current code
99                 # is written specific for detecting zh- variants
100                 if( !$this->mPreferredVariant ) {
101                         // see if some supported language variant is set in the
102                         // http header, but we don't set the mPreferredVariant
103                         // variable in case this is called before the user's
104                         // preference is loaded
105                         $pv=$this->mMainLanguageCode;
106                         if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
107                                 $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
108                                 $zh = strstr($header, 'zh-');
109                                 if($zh) {
110                                         $pv = substr($zh,0,5);
111                                 }
112                         }
113                         return $pv;
114                 }
115         }
116
117         /**
118      * dictionary-based conversion
119      *
120      * @param string $text the text to be converted
121      * @param string $toVariant the target language code
122      * @return string the converted text
123      * @access private
124      */
125         function autoConvert($text, $toVariant=false) {
126                 $fname="LanguageConverter::autoConvert";
127
128                 wfProfileIn( $fname );
129
130                 if(!$this->mTablesLoaded)
131                         $this->loadTables();
132
133                 if(!$toVariant)
134                         $toVariant = $this->getPreferredVariant();
135                 if(!in_array($toVariant, $this->mVariants))
136                         return $text;
137
138                 /* we convert everything except:
139                    1. html markups (anything between < and >)
140                    2. html entities
141                    3. place holders created by the parser
142                 */
143                 global $wgParser;
144                 if (isset($wgParser))
145                         $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
146                 else
147                         $marker = "";
148                 $reg = '/<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . '/';
149                 $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
150
151
152                 $m = array_shift($matches);
153                 $ret = strtr($m[0], $this->mTables[$toVariant]);
154                 $mstart = $m[1]+strlen($m[0]);
155                 foreach($matches as $m) {
156                         $ret .= substr($text, $mstart, $m[1]-$mstart);
157                         $ret .= strtr($m[0], $this->mTables[$toVariant]);
158                         $mstart = $m[1] + strlen($m[0]);
159                 }
160                 wfProfileOut( $fname );
161                 return $ret;
162         }
163
164         /**
165      * convert text to all supported variants
166      *
167      * @param string $text the text to be converted
168      * @return array of string
169      * @access private
170      */
171         function autoConvertToAllVariants($text) {
172                 $fname="LanguageConverter::autoConvertToAllVariants";
173                 wfProfileIn( $fname );
174                 if( !$this->mTablesLoaded )
175                         $this->loadTables();
176
177                 $ret = array();
178                 foreach($this->mVariants as $variant) {
179                         $ret[$variant] = strtr($text, $this->mTables[$variant]);
180                 }
181                 wfProfileOut( $fname );
182                 return $ret;
183         }
184
185         /**
186          * convert text to different variants of a language. the automatic
187          * conversion is done in autoConvert(). here we parse the text
188          * marked with -{}-, which specifies special conversions of the
189          * text that can not be accomplished in autoConvert()
190          *
191          * syntax of the markup:
192          * -{code1:text1;code2:text2;...}-  or
193          * -{text}- in which case no conversion should take place for text
194      *
195      * @param string $text text to be converted
196      * @param bool $isTitle whether this conversion is for the article title
197      * @return string converted text
198      * @access public
199      */
200         function convert( $text , $isTitle=false) {
201                 global $wgDisableLangConversion;
202                 global $wgTitle;
203
204                 /* don't do anything if this is the conversion table */
205                 if($wgTitle->getNamespace() == NS_MEDIAWIKI &&
206                    strpos($wgTitle->getText(), "Conversiontable")!==false)
207                         return $text;
208
209                 if($wgDisableLangConversion)
210                         return $text;
211
212                 $mw =& MagicWord::get( MAG_NOTITLECONVERT );
213                 if( $mw->matchAndRemove( $text ) )
214                         $this->mDoTitleConvert = false;
215
216                 $mw =& MagicWord::get( MAG_NOCONTENTCONVERT );
217                 if( $mw->matchAndRemove( $text ) ) {
218                         $this->mDoContentConvert = false;
219                 }
220
221                 // no conversion if redirecting
222                 $mw =& MagicWord::get( MAG_REDIRECT );
223                 if( $mw->matchStart( $text ))
224                         return $text;
225
226                 if( $isTitle ) {
227                         if( !$this->mDoTitleConvert ) {
228                                 $this->mTitleDisplay = $text;
229                                 return $text;
230                         }
231                         if( !empty($this->mTitleDisplay))
232                                 return $this->mTitleDisplay;
233
234                         global $wgRequest;
235                         $isredir = $wgRequest->getText( 'redirect', 'yes' );
236                         $action = $wgRequest->getText( 'action' );
237                         if ( $isredir == 'no' || $action == 'edit' ) {
238                                 return $text;
239                         }
240                         else {
241                                 $this->mTitleDisplay = $this->autoConvert($text);
242                                 return $this->mTitleDisplay;
243                         }
244                 }
245
246                 if( !$this->mDoContentConvert )
247                         return $text;
248
249                 $plang = $this->getPreferredVariant();
250                 $fallback = $this->mVariantFallbacks[$plang];
251
252                 $tarray = explode($this->mMarkup['begin'], $text);
253                 $tfirst = array_shift($tarray);
254                 $text = $this->autoConvert($tfirst);
255                 foreach($tarray as $txt) {
256                         $marked = explode($this->mMarkup['end'], $txt);
257                         $flags = array();
258                         $tt = explode($this->mMarkup['flagsep'], $marked[0], 2);
259
260                         if(sizeof($tt) == 2) {
261                                 $f = explode($this->mMarkup['varsep'], $tt[0]);
262                                 foreach($f as $ff) {
263                                         $ff = trim($ff);
264                                         if(array_key_exists($ff, $this->mFlags) &&
265                                                 !array_key_exists($this->mFlags[$ff], $flags))
266                                                 $flags[] = $this->mFlags[$ff];
267                                 }
268                                 $rules = $tt[1];
269                         }
270                         else
271                                 $rules = $marked[0];
272
273 #FIXME: may cause trouble here...
274                         //strip &nbsp; since it interferes with the parsing, plus,
275                         //all spaces should be stripped in this tag anyway.
276                         $rules = str_replace('&nbsp;', '', $rules);
277
278                         $carray = $this->parseManualRule($rules, $flags);
279                         $disp = '';
280                         if(array_key_exists($plang, $carray))
281                                 $disp = $carray[$plang];
282                         else if(array_key_exists($fallback, $carray))
283                                 $disp = $carray[$fallback];
284                         if($disp) {
285                                 if(in_array('T',  $flags))
286                                         $this->mTitleDisplay = $disp;
287                                 else
288                                         $text .= $disp;
289
290                                 if(in_array('A', $flags)) {
291                                         /* modify the conversion table for this session*/
292
293                                         /* fill in the missing variants, if any,
294                                             with fallbacks */
295                                         foreach($this->mVariants as $v) {
296                                                 if(!array_key_exists($v, $carray)) {
297                                                         $vf = $this->getVariantFallback($v);
298                                                         if(array_key_exists($vf, $carray))
299                                                                 $carray[$v] = $carray[$vf];
300                                                 }
301                                         }
302
303                                         foreach($this->mVariants as $vfrom) {
304                                                 if(!array_key_exists($vfrom, $carray))
305                                                         continue;
306                                                 foreach($this->mVariants as $vto) {
307                                                         if($vfrom == $vto)
308                                                                 continue;
309                                                         if(!array_key_exists($vto, $carray))
310                                                                 continue;
311                                                         $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto];
312
313                                                 }
314                                         }
315                                 }
316                         }
317                         else {
318                                 $text .= $marked[0];
319                         }
320                         if(array_key_exists(1, $marked))
321                                 $text .= $this->autoConvert($marked[1]);
322                 }
323
324                 return $text;
325         }
326
327         /**
328          * parse the manually marked conversion rule
329          * @param string $rule the text of the rule
330          * @return array of the translation in each variant
331          * @access private
332          */
333         function parseManualRule($rules, $flags=array()) {
334
335                 $choice = explode($this->mMarkup['varsep'], $rules);
336                 $carray = array();
337                 if(sizeof($choice) == 1) {
338                         /* a single choice */
339                         foreach($this->mVariants as $v)
340                                 $carray[$v] = $choice[0];
341                 }
342                 else {
343                         foreach($choice as $c) {
344                                 $v = explode($this->mMarkup['codesep'], $c);
345                                 if(sizeof($v) != 2) // syntax error, skip
346                                         continue;
347                                 $carray[trim($v[0])] = trim($v[1]);
348                         }
349                 }
350                 return $carray;
351         }
352
353         /**
354          * if a language supports multiple variants, it is
355          * possible that non-existing link in one variant
356          * actually exists in another variant. this function
357          * tries to find it. See e.g. LanguageZh.php
358          *
359          * @param string $link the name of the link
360          * @param mixed $nt the title object of the link
361          * @return null the input parameters may be modified upon return
362      * @access public
363          */
364         function findVariantLink( &$link, &$nt ) {
365                 static $count=0; //used to limit this operation
366                 static $cache=array();
367                 global $wgDisableLangConversion;
368                 $pref = $this->getPreferredVariant();
369                 $ns=0;
370                 if(is_object($nt))
371                         $ns = $nt->getNamespace();
372                 if( $count > 50 && $ns != NS_CATEGORY )
373                         return;
374                 $count++;
375                 $variants = $this->autoConvertToAllVariants($link);
376                 if($variants == false) //give up
377                         return;
378                 foreach( $variants as $v ) {
379                         if(isset($cache[$v]))
380                                 continue;
381                         $cache[$v] = 1;
382                         $varnt = Title::newFromText( $v );
383                         if( $varnt && $varnt->getArticleID() > 0 ) {
384                                 $nt = $varnt;
385                                 if( !$wgDisableLangConversion )
386                                         $link = $v;
387                                 break;
388                         }
389                 }
390         }
391
392     /**
393      * returns language specific hash options
394      *
395      * @access public
396      */
397         function getExtraHashOptions() {
398                 $variant = $this->getPreferredVariant();
399                 return '!' . $variant ;
400         }
401
402     /**
403      * get title text as defined in the body of the article text
404      *
405      * @access public
406      */
407         function getParsedTitle() {
408                 return $this->mTitleDisplay;
409         }
410
411         /**
412      * a write lock to the cache
413      *
414      * @access private
415      */
416         function lockCache() {
417                 global $wgMemc;
418                 $success = false;
419                 for($i=0; $i<30; $i++) {
420                         if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10))
421                                 break;
422                         sleep(1);
423                 }
424                 return $success;
425         }
426
427         /**
428      * unlock cache
429      *
430      * @access private
431      */
432         function unlockCache() {
433                 global $wgMemc;
434                 $wgMemc->delete($this->mCacheKey . "lock");
435         }
436
437
438         /**
439      * Load default conversion tables
440      * This method must be implemented in derived class
441      *
442      * @access private
443      */
444         function loadDefaultTables() {
445                 $name = get_class($this);
446                 die("Must implement loadDefaultTables() method in class $name");
447         }
448
449         /**
450      * load conversion tables either from the cache or the disk
451      * @access private
452      */
453         function loadTables($fromcache=true) {
454                 global $wgMemc;
455                 if( $this->mTablesLoaded )
456                         return;
457                 $this->mTablesLoaded = true;
458                 if($fromcache) {
459                         $this->mTables = $wgMemc->get( $this->mCacheKey );
460                         if( !empty( $this->mTables ) ) //all done
461                                 return;
462                 }
463                 // not in cache, or we need a fresh reload.
464                 // we will first load the default tables
465                 // then update them using things in MediaWiki:Zhconversiontable/*
466                 global $wgMessageCache;
467                 $this->loadDefaultTables();
468                 foreach($this->mVariants as $var) {
469                         $cached = $this->parseCachedTable($var);
470                         $this->mTables[$var] = array_merge($this->mTables[$var], $cached);
471                 }
472
473                 $this->postLoadTables();
474
475                 if($this->lockCache()) {
476                         $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
477                         $this->unlockCache();
478                 }
479         }
480
481     /**
482      * Hook for post processig after conversion tables are loaded
483      *
484      */
485         function postLoadTables() {}
486
487     /**
488      * Reload the conversion tables
489      *
490      * @access private
491      */
492         function reloadTables() {
493                 if($this->mTables)
494                         unset($this->mTables);
495                 $this->mTablesLoaded = false;
496                 $this->loadTables(false);
497         }
498
499
500         /**
501      * parse the conversion table stored in the cache
502      *
503      * the tables should be in blocks of the following form:
504
505      *          -{
506      *                  word => word ;
507      *                  word => word ;
508      *                  ...
509      *          }-
510      *
511      *  to make the tables more manageable, subpages are allowed
512      *  and will be parsed recursively if $recursive=true
513      *
514      * @access private
515          */
516         function parseCachedTable($code, $subpage='', $recursive=true) {
517                 global $wgMessageCache;
518                 static $parsed = array();
519
520                 if(!is_object($wgMessageCache))
521                         return array();
522
523                 $key = 'Conversiontable/'.$code;
524                 if($subpage)
525                         $key .= '/' . $subpage;
526
527                 if(array_key_exists($key, $parsed))
528                         return array();
529
530
531                 $txt = $wgMessageCache->get( $key, true, true, true );
532
533                 // get all subpage links of the form
534                 // [[MediaWiki:conversiontable/zh-xx/...|...]]
535                 $linkhead = $this->mLangObj->getNsText(NS_MEDIAWIKI) . ':Conversiontable';
536                 $subs = explode('[[', $txt);
537                 $sublinks = array();
538                 foreach( $subs as $sub ) {
539                         $link = explode(']]', $sub, 2);
540                         if(count($link) != 2)
541                                 continue;
542                         $b = explode('|', $link[0]);
543                         $b = explode('/', trim($b[0]), 3);
544                         if(count($b)==3)
545                                 $sublink = $b[2];
546                         else
547                                 $sublink = '';
548
549                         if($b[0] == $linkhead && $b[1] == $code) {
550                                 $sublinks[] = $sublink;
551                         }
552                 }
553
554
555                 // parse the mappings in this page
556                 $blocks = explode($this->mMarkup['begin'], $txt);
557                 array_shift($blocks);
558                 $ret = array();
559                 foreach($blocks as $block) {
560                         $mappings = explode($this->mMarkup['end'], $block, 2);
561                         $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
562                         $table = explode( ';', $stripped );
563                         foreach( $table as $t ) {
564                                 $m = explode( '=>', $t );
565                                 if( count( $m ) != 2)
566                                         continue;
567                                 // trim any trailling comments starting with '//'
568                                 $tt = explode('//', $m[1], 2);
569                                 $ret[trim($m[0])] = trim($tt[0]);
570                         }
571                 }
572                 $parsed[$key] = true;
573
574
575                 // recursively parse the subpages
576                 if($recursive) {
577                         foreach($sublinks as $link) {
578                                 $s = $this->parseCachedTable($code, $link, $recursive);
579                                 $ret = array_merge($ret, $s);
580                         }
581                 }
582
583                 if ($this->mUcfirst) {
584                         foreach ($ret as $k => $v) {
585                                 $ret[LanguageUtf8::ucfirst($k)] = LanguageUtf8::ucfirst($v);
586                         }
587                 }
588                 return $ret;
589         }
590
591         /**
592          * Enclose a string with the "no conversion" tag. This is used by
593          * various functions in the Parser
594          *
595          * @param string $text text to be tagged for no conversion
596          * @return string the tagged text
597         */
598         function markNoConversion($text) {
599                 # don't mark if already marked
600                 if(strpos($text, $this->mMarkup['begin']) ||
601                    strpos($text, $this->mMarkup['end']))
602                         return $text;
603
604                 $ret = $this->mMarkup['begin'] . $text . $this->mMarkup['end'];
605                 return $ret;
606         }
607
608         /**
609          * convert the sorting key for category links. this should make different
610          * keys that are variants of each other map to the same key
611         */
612         function convertCategoryKey( $key ) {
613                 return $key;
614         }
615         /**
616      * hook to refresh the cache of conversion tables when
617      * MediaWiki:conversiontable* is updated
618      * @access private
619         */
620         function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section) {
621                 $titleobj = $article->getTitle();
622                 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
623             /*
624                         global $wgContLang; // should be an LanguageZh.
625                         if(get_class($wgContLang) != 'languagezh')
626                                 return true;
627             */
628                         $title = $titleobj->getDBkey();
629                         $t = explode('/', $title, 3);
630                         $c = count($t);
631                         if( $c > 1 && $t[0] == 'Conversiontable' ) {
632                                 if(in_array($t[1], $this->mVariants)) {
633                                         $this->reloadTables();
634                                 }
635                         }
636                 }
637                 return true;
638         }
639 }
640
641 ?>