]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/libs/CSSJanus.php
MediaWiki 1.17.4
[autoinstalls/mediawiki.git] / includes / libs / CSSJanus.php
1 <?php
2 /**
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License as published by
5  * the Free Software Foundation; either version 2 of the License, or
6  * (at your option) any later version.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License along
14  * with this program; if not, write to the Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16  * http://www.gnu.org/copyleft/gpl.html
17  *
18  */
19
20 /**
21  * This is a PHP port of CSSJanus, a utility that transforms CSS style sheets
22  * written for LTR to RTL.
23  *
24  * The original Python version of CSSJanus is Copyright 2008 by Google Inc. and
25  * is distributed under the Apache license.
26  *
27  * Original code: http://code.google.com/p/cssjanus/source/browse/trunk/cssjanus.py
28  * License of original code: http://code.google.com/p/cssjanus/source/browse/trunk/LICENSE
29  * @author Roan Kattouw
30  *
31  */
32 class CSSJanus {
33         // Patterns defined as null are built dynamically by buildPatterns()
34         private static $patterns = array(
35                 'tmpToken' => '`TMP`',
36                 'nonAscii' => '[\200-\377]',
37                 'unicode' => '(?:(?:\\[0-9a-f]{1,6})(?:\r\n|\s)?)',
38                 'num' => '(?:[0-9]*\.[0-9]+|[0-9]+)',
39                 'unit' => '(?:em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)',
40                 'body_selector' => 'body\s*{\s*',
41                 'direction' => 'direction\s*:\s*',
42                 'escape' => null,
43                 'nmstart' => null,
44                 'nmchar' => null,
45                 'ident' => null,
46                 'quantity' => null,
47                 'possibly_negative_quantity' => null,
48                 'color' => null,
49                 'url_special_chars' => '[!#$%&*-~]',
50                 'valid_after_uri_chars' => '[\'\"]?\s*',
51                 'url_chars' => null,
52                 'lookahead_not_open_brace' => null,
53                 'lookahead_not_closing_paren' => null,
54                 'lookahead_for_closing_paren' => null,
55                 'lookbehind_not_letter' => '(?<![a-zA-Z])',
56                 'chars_within_selector' => '[^\}]*?',
57                 'noflip_annotation' => '\/\*\s*@noflip\s*\*\/',
58                 'noflip_single' => null,
59                 'noflip_class' => null,
60                 'comment' => '/\/\*[^*]*\*+([^\/*][^*]*\*+)*\//',
61                 'direction_ltr' => null,
62                 'direction_rtl' => null,
63                 'left' => null,
64                 'right' => null,
65                 'left_in_url' => null,
66                 'right_in_url' => null,
67                 'ltr_in_url' => null,
68                 'rtl_in_url' => null,
69                 'cursor_east' => null,
70                 'cursor_west' => null,
71                 'four_notation_quantity' => null,
72                 'four_notation_color' => null,
73                 'bg_horizontal_percentage' => null,
74                 'bg_horizontal_percentage_x' => null,
75         );
76
77         /**
78          * Build patterns we can't define above because they depend on other patterns.
79          */
80         private static function buildPatterns() {
81                 if ( !is_null( self::$patterns['escape'] ) ) {
82                         // Patterns have already been built
83                         return;
84                 }
85
86                 $patterns =& self::$patterns;
87                 $patterns['escape'] = "(?:{$patterns['unicode']}|\\[^\r\n\f0-9a-f])";
88                 $patterns['nmstart'] = "(?:[_a-z]|{$patterns['nonAscii']}|{$patterns['escape']})";
89                 $patterns['nmchar'] = "(?:[_a-z0-9-]|{$patterns['nonAscii']}|{$patterns['escape']})";
90                 $patterns['ident'] = "-?{$patterns['nmstart']}{$patterns['nmchar']}*";
91                 $patterns['quantity'] = "{$patterns['num']}(?:\s*{$patterns['unit']}|{$patterns['ident']})?";
92                 $patterns['possibly_negative_quantity'] = "((?:-?{$patterns['quantity']})|(?:inherit|auto))";
93                 $patterns['color'] = "(#?{$patterns['nmchar']}+)";
94                 $patterns['url_chars'] = "(?:{$patterns['url_special_chars']}|{$patterns['nonAscii']}|{$patterns['escape']})*";
95                 $patterns['lookahead_not_open_brace'] = "(?!({$patterns['nmchar']}|\r?\n|\s|#|\:|\.|\,|\+|>)*?{)";
96                 $patterns['lookahead_not_closing_paren'] = "(?!{$patterns['url_chars']}?{$patterns['valid_after_uri_chars']}\))";
97                 $patterns['lookahead_for_closing_paren'] = "(?={$patterns['url_chars']}?{$patterns['valid_after_uri_chars']}\))";
98                 $patterns['noflip_single'] = "/({$patterns['noflip_annotation']}{$patterns['lookahead_not_open_brace']}[^;}]+;?)/i";
99                 $patterns['noflip_class'] = "/({$patterns['noflip_annotation']}{$patterns['chars_within_selector']}})/i";
100                 $patterns['direction_ltr'] = "/({$patterns['direction']})ltr/i";
101                 $patterns['direction_rtl'] = "/({$patterns['direction']})rtl/i";
102                 $patterns['left'] = "/{$patterns['lookbehind_not_letter']}(left){$patterns['lookahead_not_closing_paren']}{$patterns['lookahead_not_open_brace']}/i";
103                 $patterns['right'] = "/{$patterns['lookbehind_not_letter']}(right){$patterns['lookahead_not_closing_paren']}{$patterns['lookahead_not_open_brace']}/i";
104                 $patterns['left_in_url'] = "/{$patterns['lookbehind_not_letter']}(left){$patterns['lookahead_for_closing_paren']}/i";
105                 $patterns['right_in_url'] = "/{$patterns['lookbehind_not_letter']}(right){$patterns['lookahead_for_closing_paren']}/i";
106                 $patterns['ltr_in_url'] = "/{$patterns['lookbehind_not_letter']}(ltr){$patterns['lookahead_for_closing_paren']}/i";
107                 $patterns['rtl_in_url'] = "/{$patterns['lookbehind_not_letter']}(rtl){$patterns['lookahead_for_closing_paren']}/i";
108                 $patterns['cursor_east'] = "/{$patterns['lookbehind_not_letter']}([ns]?)e-resize/";
109                 $patterns['cursor_west'] = "/{$patterns['lookbehind_not_letter']}([ns]?)w-resize/";
110                 $patterns['four_notation_quantity'] = "/{$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}(\s+){$patterns['possibly_negative_quantity']}/i";
111                 $patterns['four_notation_color'] = "/(-color\s*:\s*){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}(\s+){$patterns['color']}/i";
112                 // The two regexes below are parenthesized differently then in the original implementation to make the
113                 // callback's job more straightforward
114                 $patterns['bg_horizontal_percentage'] = "/(background(?:-position)?\s*:\s*[^%]*?)({$patterns['num']})(%\s*(?:{$patterns['quantity']}|{$patterns['ident']}))/";
115                 $patterns['bg_horizontal_percentage_x'] = "/(background-position-x\s*:\s*)({$patterns['num']})(%)/";
116         }
117
118         /**
119          * Transform an LTR stylesheet to RTL
120          * @param $css String: stylesheet to transform
121          * @param $swapLtrRtlInURL Boolean: If true, swap 'ltr' and 'rtl' in URLs
122          * @param $swapLeftRightInURL Boolean: If true, swap 'left' and 'right' in URLs
123          * @return Transformed stylesheet
124          */
125         public static function transform( $css, $swapLtrRtlInURL = false, $swapLeftRightInURL = false ) {
126                 // We wrap tokens in ` , not ~ like the original implementation does.
127                 // This was done because ` is not a legal character in CSS and can only
128                 // occur in URLs, where we escape it to %60 before inserting our tokens.
129                 $css = str_replace( '`', '%60', $css );
130
131                 self::buildPatterns();
132
133                 // Tokenize single line rules with /* @noflip */
134                 $noFlipSingle = new CSSJanus_Tokenizer( self::$patterns['noflip_single'], '`NOFLIP_SINGLE`' );
135                 $css = $noFlipSingle->tokenize( $css );
136
137                 // Tokenize class rules with /* @noflip */
138                 $noFlipClass = new CSSJanus_Tokenizer( self::$patterns['noflip_class'], '`NOFLIP_CLASS`' );
139                 $css = $noFlipClass->tokenize( $css );
140
141                 // Tokenize comments
142                 $comments = new CSSJanus_Tokenizer( self::$patterns['comment'], '`C`' );
143                 $css = $comments->tokenize( $css );
144
145                 // LTR->RTL fixes start here
146                 $css = self::fixDirection( $css );
147                 if ( $swapLtrRtlInURL ) {
148                         $css = self::fixLtrRtlInURL( $css );
149                 }
150
151                 if ( $swapLeftRightInURL ) {
152                         $css = self::fixLeftRightInURL( $css );
153                 }
154                 $css = self::fixLeftAndRight( $css );
155                 $css = self::fixCursorProperties( $css );
156                 $css = self::fixFourPartNotation( $css );
157                 $css = self::fixBackgroundPosition( $css );
158
159                 // Detokenize stuff we tokenized before
160                 $css = $comments->detokenize( $css );
161                 $css = $noFlipClass->detokenize( $css );
162                 $css = $noFlipSingle->detokenize( $css );
163
164                 return $css;
165         }
166
167         /**
168          * Replace direction: ltr; with direction: rtl; and vice versa.
169          *
170          * The original implementation only does this inside body selectors
171          * and misses "body\n{\ndirection:ltr;\n}". This function does not have
172          * these problems.
173          *
174          * See http://code.google.com/p/cssjanus/issues/detail?id=15 and
175          * TODO: URL
176          */
177         private static function fixDirection( $css ) {
178                 $css = preg_replace( self::$patterns['direction_ltr'],
179                         '$1' . self::$patterns['tmpToken'], $css );
180                 $css = preg_replace( self::$patterns['direction_rtl'], '$1ltr', $css );
181                 $css = str_replace( self::$patterns['tmpToken'], 'rtl', $css );
182
183                 return $css;
184         }
185
186         /**
187          * Replace 'ltr' with 'rtl' and vice versa in background URLs
188          */
189         private static function fixLtrRtlInURL( $css ) {
190                 $css = preg_replace( self::$patterns['ltr_in_url'], self::$patterns['tmpToken'], $css );
191                 $css = preg_replace( self::$patterns['rtl_in_url'], 'ltr', $css );
192                 $css = str_replace( self::$patterns['tmpToken'], 'rtl', $css );
193
194                 return $css;
195         }
196
197         /**
198          * Replace 'left' with 'right' and vice versa in background URLs
199          */
200         private static function fixLeftRightInURL( $css ) {
201                 $css = preg_replace( self::$patterns['left_in_url'], self::$patterns['tmpToken'], $css );
202                 $css = preg_replace( self::$patterns['right_in_url'], 'left', $css );
203                 $css = str_replace( self::$patterns['tmpToken'], 'right', $css );
204
205                 return $css;
206         }
207
208         /**
209          * Flip rules like left: , padding-right: , etc.
210          */
211         private static function fixLeftAndRight( $css ) {
212                 $css = preg_replace( self::$patterns['left'], self::$patterns['tmpToken'], $css );
213                 $css = preg_replace( self::$patterns['right'], 'left', $css );
214                 $css = str_replace( self::$patterns['tmpToken'], 'right', $css );
215
216                 return $css;
217         }
218
219         /**
220          * Flip East and West in rules like cursor: nw-resize;
221          */
222         private static function fixCursorProperties( $css ) {
223                 $css = preg_replace( self::$patterns['cursor_east'],
224                         '$1' . self::$patterns['tmpToken'], $css );
225                 $css = preg_replace( self::$patterns['cursor_west'], '$1e-resize', $css );
226                 $css = str_replace( self::$patterns['tmpToken'], 'w-resize', $css );
227
228                 return $css;
229         }
230
231         /**
232          * Swap the second and fourth parts in four-part notation rules like
233          * padding: 1px 2px 3px 4px;
234          *
235          * Unlike the original implementation, this function doesn't suffer from
236          * the bug where whitespace is not preserved when flipping four-part rules
237          * and four-part color rules with multiple whitespace characters between
238          * colors are not recognized.
239          * See http://code.google.com/p/cssjanus/issues/detail?id=16
240          */
241         private static function fixFourPartNotation( $css ) {
242                 $css = preg_replace( self::$patterns['four_notation_quantity'], '$1$2$7$4$5$6$3', $css );
243                 $css = preg_replace( self::$patterns['four_notation_color'], '$1$2$3$8$5$6$7$4', $css );
244
245                 return $css;
246         }
247
248         /**
249          * Flip horizontal background percentages.
250          */
251         private static function fixBackgroundPosition( $css ) {
252                 $css = preg_replace_callback( self::$patterns['bg_horizontal_percentage'],
253                         array( 'self', 'calculateNewBackgroundPosition' ), $css );
254                 $css = preg_replace_callback( self::$patterns['bg_horizontal_percentage_x'],
255                         array( 'self', 'calculateNewBackgroundPosition' ), $css );
256
257                 return $css;
258         }
259
260         /**
261          * Callback for calculateNewBackgroundPosition()
262          */
263         private static function calculateNewBackgroundPosition( $matches ) {
264                 return $matches[1] . ( 100 - $matches[2] ) . $matches[3];
265         }
266 }
267
268 /**
269  * Utility class used by CSSJanus that tokenizes and untokenizes things we want
270  * to protect from being janused.
271  * @author Roan Kattouw
272  */
273 class CSSJanus_Tokenizer {
274         private $regex, $token;
275         private $originals;
276
277         /**
278          * Constructor
279          * @param $regex string Regular expression whose matches to replace by a token.
280          * @param $token string Token
281          */
282         public function __construct( $regex, $token ) {
283                 $this->regex = $regex;
284                 $this->token = $token;
285                 $this->originals = array();
286         }
287
288         /**
289          * Replace all occurrences of $regex in $str with a token and remember
290          * the original strings.
291          * @param $str String to tokenize
292          * @return string Tokenized string
293          */
294         public function tokenize( $str ) {
295                 return preg_replace_callback( $this->regex, array( $this, 'tokenizeCallback' ), $str );
296         }
297
298         private function tokenizeCallback( $matches ) {
299                 $this->originals[] = $matches[0];
300                 return $this->token;
301         }
302
303         /**
304          * Replace tokens with their originals. If multiple strings were tokenized, it's important they be
305          * detokenized in exactly the SAME ORDER.
306          * @param $str String: previously run through tokenize()
307          * @return string Original string
308          */
309         public function detokenize( $str ) {
310                 // PHP has no function to replace only the first occurrence or to
311                 // replace occurrences of the same string with different values,
312                 // so we use preg_replace_callback() even though we don't really need a regex
313                 return preg_replace_callback( '/' . preg_quote( $this->token, '/' ) . '/',
314                         array( $this, 'detokenizeCallback' ), $str );
315         }
316
317         private function detokenizeCallback( $matches ) {
318                 $retval = current( $this->originals );
319                 next( $this->originals );
320
321                 return $retval;
322         }
323 }