]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - languages/ConverterRule.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / languages / ConverterRule.php
1 <?php
2 /**
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation; either version 2 of the License, or
6  * (at your option) any later version.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License along
14  * with this program; if not, write to the Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16  * http://www.gnu.org/copyleft/gpl.html
17  *
18  * @file
19  * @ingroup Language
20  */
21
22 /**
23  * Parser for rules of language conversion , parse rules in -{ }- tag.
24  * @ingroup Language
25  * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com>
26  */
27 class ConverterRule {
28         public $mText; // original text in -{text}-
29         public $mConverter; // LanguageConverter object
30         public $mRuleDisplay = '';
31         public $mRuleTitle = false;
32         public $mRules = '';// string : the text of the rules
33         public $mRulesAction = 'none';
34         public $mFlags = [];
35         public $mVariantFlags = [];
36         public $mConvTable = [];
37         public $mBidtable = [];// array of the translation in each variant
38         public $mUnidtable = [];// array of the translation in each variant
39
40         /**
41          * @param string $text The text between -{ and }-
42          * @param LanguageConverter $converter
43          */
44         public function __construct( $text, $converter ) {
45                 $this->mText = $text;
46                 $this->mConverter = $converter;
47         }
48
49         /**
50          * Check if variants array in convert array.
51          *
52          * @param array|string $variants Variant language code
53          * @return string Translated text
54          */
55         public function getTextInBidtable( $variants ) {
56                 $variants = (array)$variants;
57                 if ( !$variants ) {
58                         return false;
59                 }
60                 foreach ( $variants as $variant ) {
61                         if ( isset( $this->mBidtable[$variant] ) ) {
62                                 return $this->mBidtable[$variant];
63                         }
64                 }
65                 return false;
66         }
67
68         /**
69          * Parse flags with syntax -{FLAG| ... }-
70          * @private
71          */
72         function parseFlags() {
73                 $text = $this->mText;
74                 $flags = [];
75                 $variantFlags = [];
76
77                 $sepPos = strpos( $text, '|' );
78                 if ( $sepPos !== false ) {
79                         $validFlags = $this->mConverter->mFlags;
80                         $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
81                         foreach ( $f as $ff ) {
82                                 $ff = trim( $ff );
83                                 if ( isset( $validFlags[$ff] ) ) {
84                                         $flags[$validFlags[$ff]] = true;
85                                 }
86                         }
87                         $text = strval( substr( $text, $sepPos + 1 ) );
88                 }
89
90                 if ( !$flags ) {
91                         $flags['S'] = true;
92                 } elseif ( isset( $flags['R'] ) ) {
93                         $flags = [ 'R' => true ];// remove other flags
94                 } elseif ( isset( $flags['N'] ) ) {
95                         $flags = [ 'N' => true ];// remove other flags
96                 } elseif ( isset( $flags['-'] ) ) {
97                         $flags = [ '-' => true ];// remove other flags
98                 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
99                         $flags['H'] = true;
100                 } elseif ( isset( $flags['H'] ) ) {
101                         // replace A flag, and remove other flags except T
102                         $temp = [ '+' => true, 'H' => true ];
103                         if ( isset( $flags['T'] ) ) {
104                                 $temp['T'] = true;
105                         }
106                         if ( isset( $flags['D'] ) ) {
107                                 $temp['D'] = true;
108                         }
109                         $flags = $temp;
110                 } else {
111                         if ( isset( $flags['A'] ) ) {
112                                 $flags['+'] = true;
113                                 $flags['S'] = true;
114                         }
115                         if ( isset( $flags['D'] ) ) {
116                                 unset( $flags['S'] );
117                         }
118                         // try to find flags like "zh-hans", "zh-hant"
119                         // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
120                         $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
121                         if ( $variantFlags ) {
122                                 $variantFlags = array_flip( $variantFlags );
123                                 $flags = [];
124                         }
125                 }
126                 $this->mVariantFlags = $variantFlags;
127                 $this->mRules = $text;
128                 $this->mFlags = $flags;
129         }
130
131         /**
132          * Generate conversion table.
133          * @private
134          */
135         function parseRules() {
136                 $rules = $this->mRules;
137                 $bidtable = [];
138                 $unidtable = [];
139                 $variants = $this->mConverter->mVariants;
140                 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
141
142                 // Split according to $varsep_pattern, but ignore semicolons from HTML entities
143                 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
144                 $choice = preg_split( $varsep_pattern, $rules );
145                 $choice = str_replace( "\x01", ';', $choice );
146
147                 foreach ( $choice as $c ) {
148                         $v = explode( ':', $c, 2 );
149                         if ( count( $v ) != 2 ) {
150                                 // syntax error, skip
151                                 continue;
152                         }
153                         $to = trim( $v[1] );
154                         $v = trim( $v[0] );
155                         $u = explode( '=>', $v, 2 );
156                         // if $to is empty (which is also used as $from in bidtable),
157                         // strtr() could return a wrong result.
158                         if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) {
159                                 $bidtable[$v] = $to;
160                         } elseif ( count( $u ) == 2 ) {
161                                 $from = trim( $u[0] );
162                                 $v = trim( $u[1] );
163                                 // if $from is empty, strtr() could return a wrong result.
164                                 if ( array_key_exists( $v, $unidtable )
165                                         && !is_array( $unidtable[$v] )
166                                         && $from !== ''
167                                         && in_array( $v, $variants ) ) {
168                                         $unidtable[$v] = [ $from => $to ];
169                                 } elseif ( $from !== '' && in_array( $v, $variants ) ) {
170                                         $unidtable[$v][$from] = $to;
171                                 }
172                         }
173                         // syntax error, pass
174                         if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
175                                 $bidtable = [];
176                                 $unidtable = [];
177                                 break;
178                         }
179                 }
180                 $this->mBidtable = $bidtable;
181                 $this->mUnidtable = $unidtable;
182         }
183
184         /**
185          * @private
186          *
187          * @return string
188          */
189         function getRulesDesc() {
190                 $codesep = $this->mConverter->mDescCodeSep;
191                 $varsep = $this->mConverter->mDescVarSep;
192                 $text = '';
193                 foreach ( $this->mBidtable as $k => $v ) {
194                         $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
195                 }
196                 foreach ( $this->mUnidtable as $k => $a ) {
197                         foreach ( $a as $from => $to ) {
198                                 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
199                                         "$codesep$to$varsep";
200                         }
201                 }
202                 return $text;
203         }
204
205         /**
206          * Parse rules conversion.
207          * @private
208          *
209          * @param string $variant
210          *
211          * @return string
212          */
213         function getRuleConvertedStr( $variant ) {
214                 $bidtable = $this->mBidtable;
215                 $unidtable = $this->mUnidtable;
216
217                 if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
218                         return $this->mRules;
219                 } else {
220                         // display current variant in bidirectional array
221                         $disp = $this->getTextInBidtable( $variant );
222                         // or display current variant in fallbacks
223                         if ( $disp === false ) {
224                                 $disp = $this->getTextInBidtable(
225                                         $this->mConverter->getVariantFallbacks( $variant ) );
226                         }
227                         // or display current variant in unidirectional array
228                         if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
229                                 $disp = array_values( $unidtable[$variant] )[0];
230                         }
231                         // or display first text under disable manual convert
232                         if ( $disp === false && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
233                                 if ( count( $bidtable ) > 0 ) {
234                                         $disp = array_values( $bidtable )[0];
235                                 } else {
236                                         $disp = array_values( array_values( $unidtable )[0] )[0];
237                                 }
238                         }
239                         return $disp;
240                 }
241         }
242
243         /**
244          * Similar to getRuleConvertedStr(), but this prefers to use original
245          * page title if $variant === $this->mConverter->mMainLanguageCode
246          * and may return false in this case (so this title conversion rule
247          * will be ignored and the original title is shown).
248          *
249          * @since 1.22
250          * @param string $variant The variant code to display page title in
251          * @return string|bool The converted title or false if just page name
252          */
253         function getRuleConvertedTitle( $variant ) {
254                 if ( $variant === $this->mConverter->mMainLanguageCode ) {
255                         // If a string targeting exactly this variant is set,
256                         // use it. Otherwise, just return false, so the real
257                         // page name can be shown (and because variant === main,
258                         // there'll be no further automatic conversion).
259                         $disp = $this->getTextInBidtable( $variant );
260                         if ( $disp ) {
261                                 return $disp;
262                         }
263                         if ( array_key_exists( $variant, $this->mUnidtable ) ) {
264                                 $disp = array_values( $this->mUnidtable[$variant] )[0];
265                         }
266                         // Assigned above or still false.
267                         return $disp;
268                 } else {
269                         return $this->getRuleConvertedStr( $variant );
270                 }
271         }
272
273         /**
274          * Generate conversion table for all text.
275          * @private
276          */
277         function generateConvTable() {
278                 // Special case optimisation
279                 if ( !$this->mBidtable && !$this->mUnidtable ) {
280                         $this->mConvTable = [];
281                         return;
282                 }
283
284                 $bidtable = $this->mBidtable;
285                 $unidtable = $this->mUnidtable;
286                 $manLevel = $this->mConverter->mManualLevel;
287
288                 $vmarked = [];
289                 foreach ( $this->mConverter->mVariants as $v ) {
290                         /* for bidirectional array
291                                 fill in the missing variants, if any,
292                                 with fallbacks */
293                         if ( !isset( $bidtable[$v] ) ) {
294                                 $variantFallbacks =
295                                         $this->mConverter->getVariantFallbacks( $v );
296                                 $vf = $this->getTextInBidtable( $variantFallbacks );
297                                 if ( $vf ) {
298                                         $bidtable[$v] = $vf;
299                                 }
300                         }
301
302                         if ( isset( $bidtable[$v] ) ) {
303                                 foreach ( $vmarked as $vo ) {
304                                         // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
305                                         // or -{H|zh:WordZh;zh-tw:WordTw}-
306                                         // or -{-|zh:WordZh;zh-tw:WordTw}-
307                                         // to introduce a custom mapping between
308                                         // words WordZh and WordTw in the whole text
309                                         if ( $manLevel[$v] == 'bidirectional' ) {
310                                                 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
311                                         }
312                                         if ( $manLevel[$vo] == 'bidirectional' ) {
313                                                 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
314                                         }
315                                 }
316                                 $vmarked[] = $v;
317                         }
318                         /* for unidirectional array fill to convert tables */
319                         if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
320                                 && isset( $unidtable[$v] )
321                         ) {
322                                 if ( isset( $this->mConvTable[$v] ) ) {
323                                         $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
324                                 } else {
325                                         $this->mConvTable[$v] = $unidtable[$v];
326                                 }
327                         }
328                 }
329         }
330
331         /**
332          * Parse rules and flags.
333          * @param string $variant Variant language code
334          */
335         public function parse( $variant = null ) {
336                 if ( !$variant ) {
337                         $variant = $this->mConverter->getPreferredVariant();
338                 }
339
340                 $this->parseFlags();
341                 $flags = $this->mFlags;
342
343                 // convert to specified variant
344                 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
345                 if ( $this->mVariantFlags ) {
346                         // check if current variant in flags
347                         if ( isset( $this->mVariantFlags[$variant] ) ) {
348                                 // then convert <text to convert> to current language
349                                 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
350                                         $variant );
351                         } else {
352                                 // if current variant no in flags,
353                                 // then we check its fallback variants.
354                                 $variantFallbacks =
355                                         $this->mConverter->getVariantFallbacks( $variant );
356                                 if ( is_array( $variantFallbacks ) ) {
357                                         foreach ( $variantFallbacks as $variantFallback ) {
358                                                 // if current variant's fallback exist in flags
359                                                 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
360                                                         // then convert <text to convert> to fallback language
361                                                         $this->mRules =
362                                                                 $this->mConverter->autoConvert( $this->mRules,
363                                                                         $variantFallback );
364                                                         break;
365                                                 }
366                                         }
367                                 }
368                         }
369                         $this->mFlags = $flags = [ 'R' => true ];
370                 }
371
372                 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
373                         // decode => HTML entities modified by Sanitizer::removeHTMLtags
374                         $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
375                         $this->parseRules();
376                 }
377                 $rules = $this->mRules;
378
379                 if ( !$this->mBidtable && !$this->mUnidtable ) {
380                         if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
381                                 // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
382                                 if ( $rules !== '' ) {
383                                         foreach ( $this->mConverter->mVariants as $v ) {
384                                                 $this->mBidtable[$v] = $rules;
385                                         }
386                                 }
387                         } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
388                                 $this->mFlags = $flags = [ 'R' => true ];
389                         }
390                 }
391
392                 $this->mRuleDisplay = false;
393                 foreach ( $flags as $flag => $unused ) {
394                         switch ( $flag ) {
395                                 case 'R':
396                                         // if we don't do content convert, still strip the -{}- tags
397                                         $this->mRuleDisplay = $rules;
398                                         break;
399                                 case 'N':
400                                         // process N flag: output current variant name
401                                         $ruleVar = trim( $rules );
402                                         if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
403                                                 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
404                                         } else {
405                                                 $this->mRuleDisplay = '';
406                                         }
407                                         break;
408                                 case 'D':
409                                         // process D flag: output rules description
410                                         $this->mRuleDisplay = $this->getRulesDesc();
411                                         break;
412                                 case 'H':
413                                         // process H,- flag or T only: output nothing
414                                         $this->mRuleDisplay = '';
415                                         break;
416                                 case '-':
417                                         $this->mRulesAction = 'remove';
418                                         $this->mRuleDisplay = '';
419                                         break;
420                                 case '+':
421                                         $this->mRulesAction = 'add';
422                                         $this->mRuleDisplay = '';
423                                         break;
424                                 case 'S':
425                                         $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
426                                         break;
427                                 case 'T':
428                                         $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
429                                         $this->mRuleDisplay = '';
430                                         break;
431                                 default:
432                                         // ignore unknown flags (but see error case below)
433                         }
434                 }
435                 if ( $this->mRuleDisplay === false ) {
436                         $this->mRuleDisplay = '<span class="error">'
437                                 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
438                                 . '</span>';
439                 }
440
441                 $this->generateConvTable();
442         }
443
444         /**
445          * Checks if there are conversion rules.
446          * @return bool
447          */
448         public function hasRules() {
449                 return $this->mRules !== '';
450         }
451
452         /**
453          * Get display text on markup -{...}-
454          * @return string
455          */
456         public function getDisplay() {
457                 return $this->mRuleDisplay;
458         }
459
460         /**
461          * Get converted title.
462          * @return string
463          */
464         public function getTitle() {
465                 return $this->mRuleTitle;
466         }
467
468         /**
469          * Return how deal with conversion rules.
470          * @return string
471          */
472         public function getRulesAction() {
473                 return $this->mRulesAction;
474         }
475
476         /**
477          * Get conversion table. (bidirectional and unidirectional
478          * conversion table)
479          * @return array
480          */
481         public function getConvTable() {
482                 return $this->mConvTable;
483         }
484
485         /**
486          * Get conversion rules string.
487          * @return string
488          */
489         public function getRules() {
490                 return $this->mRules;
491         }
492
493         /**
494          * Get conversion flags.
495          * @return array
496          */
497         public function getFlags() {
498                 return $this->mFlags;
499         }
500 }