]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - languages/classes/LanguageEo.php
MediaWiki 1.17.0
[autoinstalls/mediawiki.git] / languages / classes / LanguageEo.php
1 <?php
2
3 /** Esperanto (Esperanto)
4  *
5  * @ingroup Language
6  * @author Brion Vibber <brion@pobox.com>
7  */
8 class LanguageEo extends Language {
9         /**
10          * Wrapper for charset conversions.
11          *
12          * In most languages, this calls through to standard system iconv(), but
13          * for Esperanto we're also adding a special pseudo-charset to convert
14          * accented characters to/from the ASCII-friendly "X" surrogate coding:
15          *
16          *     cx = ĉ     cxx = cx
17          *     gx = ĝ     gxx = gx
18          *     hx = ĥ     hxx = hx
19          *     jx = ĵ     jxx = jx
20          *     sx = ŝ     sxx = sx
21          *     ux = ŭ     uxx = ux
22          *     xx = x
23          *
24          *   http://en.wikipedia.org/wiki/Esperanto_orthography#X-system
25          *   http://eo.wikipedia.org/wiki/X-sistemo
26          *
27          * X-conversion is applied, in either direction, between "utf-8" and "x" charsets;
28          * this comes into effect when input is run through $wgRequest->getText() and the
29          * $wgEditEncoding is set to 'x'.
30          *
31          * In the long run, this should be moved out of here and into the client-side
32          * editor behavior; the original server-side translation system dates to 2002-2003
33          * when many browsers with really bad Unicode support were still in use.
34          *
35          * @param string $in input character set
36          * @param string $out output character set
37          * @param string $string text to be converted
38          * @return string
39          */
40         function iconv( $in, $out, $string ) {
41                 if ( strcasecmp( $in, 'x' ) == 0 && strcasecmp( $out, 'utf-8' ) == 0 ) {
42                         return preg_replace_callback (
43                                 '/([cghjsu]x?)((?:xx)*)(?!x)/i',
44                                 array( $this, 'strrtxuCallback' ), $string      );
45                 } else if ( strcasecmp( $in, 'UTF-8' ) == 0 && strcasecmp( $out, 'x' ) == 0 ) {
46                         # Double Xs only if they follow cxapelutaj literoj.
47                         return preg_replace_callback(
48                                 '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]|\xc5[\x9c\x9d\xac\xad])x*)/i',
49                                 array( $this, 'strrtuxCallback' ), $string );
50                 }
51                 return parent::iconv( $in, $out, $string );
52         }
53
54         function strrtuxCallback( $matches ) {
55                 static $ux = array (
56                         'x' => 'xx' , 'X' => 'Xx' ,
57                         "\xc4\x88" => "Cx" , "\xc4\x89" => "cx" ,
58                         "\xc4\x9c" => "Gx" , "\xc4\x9d" => "gx" ,
59                         "\xc4\xa4" => "Hx" , "\xc4\xa5" => "hx" ,
60                         "\xc4\xb4" => "Jx" , "\xc4\xb5" => "jx" ,
61                         "\xc5\x9c" => "Sx" , "\xc5\x9d" => "sx" ,
62                         "\xc5\xac" => "Ux" , "\xc5\xad" => "ux"
63                 );
64                 return strtr( $matches[1], $ux );
65         }
66
67         function strrtxuCallback( $matches ) {
68                 static $xu = array (
69                         'xx' => 'x' , 'xX' => 'x' ,
70                         'Xx' => 'X' , 'XX' => 'X' ,
71                         "Cx" => "\xc4\x88" , "CX" => "\xc4\x88" ,
72                         "cx" => "\xc4\x89" , "cX" => "\xc4\x89" ,
73                         "Gx" => "\xc4\x9c" , "GX" => "\xc4\x9c" ,
74                         "gx" => "\xc4\x9d" , "gX" => "\xc4\x9d" ,
75                         "Hx" => "\xc4\xa4" , "HX" => "\xc4\xa4" ,
76                         "hx" => "\xc4\xa5" , "hX" => "\xc4\xa5" ,
77                         "Jx" => "\xc4\xb4" , "JX" => "\xc4\xb4" ,
78                         "jx" => "\xc4\xb5" , "jX" => "\xc4\xb5" ,
79                         "Sx" => "\xc5\x9c" , "SX" => "\xc5\x9c" ,
80                         "sx" => "\xc5\x9d" , "sX" => "\xc5\x9d" ,
81                         "Ux" => "\xc5\xac" , "UX" => "\xc5\xac" ,
82                         "ux" => "\xc5\xad" , "uX" => "\xc5\xad"
83                 );
84                 return strtr( $matches[1], $xu ) . strtr( $matches[2], $xu );
85         }
86
87         function checkTitleEncoding( $s ) {
88                 # Check for X-system backwards-compatibility URLs
89                 $ishigh = preg_match( '/[\x80-\xff]/', $s );
90                 $isutf = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
91                         '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
92
93                 if ( $ishigh and !$isutf ) {
94                         # Assume Latin1
95                         $s = utf8_encode( $s );
96                 } else {
97                         if ( preg_match( '/(\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]' .
98                                 '|\xc5[\x9c\x9d\xac\xad])/', $s ) )
99                         return $s;
100                 }
101
102                 // if( preg_match( '/[cghjsu]x/i', $s ) )
103                 //      return $this->iconv( 'x', 'utf-8', $s );
104                 return $s;
105         }
106
107         function initEncoding() {
108                 global $wgEditEncoding;
109                 $wgEditEncoding = 'x';
110         }
111 }