]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - tests/phpunit/includes/SanitizerTest.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / tests / phpunit / includes / SanitizerTest.php
1 <?php
2
3 /**
4  * @todo Tests covering decodeCharReferences can be refactored into a single
5  * method and dataprovider.
6  *
7  * @group Sanitizer
8  */
9 class SanitizerTest extends MediaWikiTestCase {
10
11         protected function tearDown() {
12                 MWTidy::destroySingleton();
13                 parent::tearDown();
14         }
15
16         /**
17          * @covers Sanitizer::decodeCharReferences
18          */
19         public function testDecodeNamedEntities() {
20                 $this->assertEquals(
21                         "\xc3\xa9cole",
22                         Sanitizer::decodeCharReferences( '&eacute;cole' ),
23                         'decode named entities'
24                 );
25         }
26
27         /**
28          * @covers Sanitizer::decodeCharReferences
29          */
30         public function testDecodeNumericEntities() {
31                 $this->assertEquals(
32                         "\xc4\x88io bonas dans l'\xc3\xa9cole!",
33                         Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
34                         'decode numeric entities'
35                 );
36         }
37
38         /**
39          * @covers Sanitizer::decodeCharReferences
40          */
41         public function testDecodeMixedEntities() {
42                 $this->assertEquals(
43                         "\xc4\x88io bonas dans l'\xc3\xa9cole!",
44                         Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
45                         'decode mixed numeric/named entities'
46                 );
47         }
48
49         /**
50          * @covers Sanitizer::decodeCharReferences
51          */
52         public function testDecodeMixedComplexEntities() {
53                 $this->assertEquals(
54                         "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
55                         Sanitizer::decodeCharReferences(
56                                 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
57                         ),
58                         'decode mixed complex entities'
59                 );
60         }
61
62         /**
63          * @covers Sanitizer::decodeCharReferences
64          */
65         public function testInvalidAmpersand() {
66                 $this->assertEquals(
67                         'a & b',
68                         Sanitizer::decodeCharReferences( 'a & b' ),
69                         'Invalid ampersand'
70                 );
71         }
72
73         /**
74          * @covers Sanitizer::decodeCharReferences
75          */
76         public function testInvalidEntities() {
77                 $this->assertEquals(
78                         '&foo;',
79                         Sanitizer::decodeCharReferences( '&foo;' ),
80                         'Invalid named entity'
81                 );
82         }
83
84         /**
85          * @covers Sanitizer::decodeCharReferences
86          */
87         public function testInvalidNumberedEntities() {
88                 $this->assertEquals(
89                         UtfNormal\Constants::UTF8_REPLACEMENT,
90                         Sanitizer::decodeCharReferences( "&#88888888888888;" ),
91                         'Invalid numbered entity'
92                 );
93         }
94
95         /**
96          * @covers Sanitizer::removeHTMLtags
97          * @dataProvider provideHtml5Tags
98          *
99          * @param string $tag Name of an HTML5 element (ie: 'video')
100          * @param bool $escaped Whether sanitizer let the tag in or escape it (ie: '&lt;video&gt;')
101          */
102         public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
103                 MWTidy::setInstance( false );
104
105                 if ( $escaped ) {
106                         $this->assertEquals( "&lt;$tag&gt;",
107                                 Sanitizer::removeHTMLtags( "<$tag>" )
108                         );
109                 } else {
110                         $this->assertEquals( "<$tag></$tag>\n",
111                                 Sanitizer::removeHTMLtags( "<$tag>" )
112                         );
113                 }
114         }
115
116         /**
117          * Provide HTML5 tags
118          */
119         public static function provideHtml5Tags() {
120                 $ESCAPED = true; # We want tag to be escaped
121                 $VERBATIM = false; # We want to keep the tag
122                 return [
123                         [ 'data', $VERBATIM ],
124                         [ 'mark', $VERBATIM ],
125                         [ 'time', $VERBATIM ],
126                         [ 'video', $ESCAPED ],
127                 ];
128         }
129
130         function dataRemoveHTMLtags() {
131                 return [
132                         // former testSelfClosingTag
133                         [
134                                 '<div>Hello world</div />',
135                                 '<div>Hello world</div>',
136                                 'Self-closing closing div'
137                         ],
138                         // Make sure special nested HTML5 semantics are not broken
139                         // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
140                         [
141                                 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
142                                 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
143                                 'Nested <kbd>.'
144                         ],
145                         // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
146                         [
147                                 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
148                                 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
149                                 'Nested <var>.'
150                         ],
151                         // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
152                         [
153                                 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
154                                 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
155                                 '<abbr> inside <dfn>',
156                         ],
157                 ];
158         }
159
160         /**
161          * @dataProvider dataRemoveHTMLtags
162          * @covers Sanitizer::removeHTMLtags
163          */
164         public function testRemoveHTMLtags( $input, $output, $msg = null ) {
165                 MWTidy::setInstance( false );
166                 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
167         }
168
169         /**
170          * @dataProvider provideTagAttributesToDecode
171          * @covers Sanitizer::decodeTagAttributes
172          */
173         public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
174                 $this->assertEquals( $expected,
175                         Sanitizer::decodeTagAttributes( $attributes ),
176                         $message
177                 );
178         }
179
180         public static function provideTagAttributesToDecode() {
181                 return [
182                         [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
183                         [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
184                         [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
185                         [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
186                         [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
187                         [ [ 'foo' => 'bar' ], '    foo   =   bar    ', 'Spaced attribute' ],
188                         [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
189                         [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
190                         [
191                                 [ 'foo' => 'bar', 'baz' => 'foo' ],
192                                 'foo=\'bar\'   baz="foo"',
193                                 'Several attributes'
194                         ],
195                         [
196                                 [ 'foo' => 'bar', 'baz' => 'foo' ],
197                                 'foo=\'bar\'   baz="foo"',
198                                 'Several attributes'
199                         ],
200                         [
201                                 [ 'foo' => 'bar', 'baz' => 'foo' ],
202                                 'foo=\'bar\'   baz="foo"',
203                                 'Several attributes'
204                         ],
205                         [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
206                         [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
207                         [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
208                         [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
209
210                         # Invalid beginning
211                         [ [], '-foo=bar', 'Leading - is forbidden' ],
212                         [ [], '.foo=bar', 'Leading . is forbidden' ],
213                         [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
214                         [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
215                         [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
216                         [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
217                         [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
218
219                         # This bit is more relaxed than XML rules, but some extensions use
220                         # it, like ProofreadPage (see T29539)
221                         [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
222                         [ [], 'foo$=baz', 'Symbols are not allowed' ],
223                         [ [], 'foo@=baz', 'Symbols are not allowed' ],
224                         [ [], 'foo~=baz', 'Symbols are not allowed' ],
225                         [
226                                 [ 'foo' => '1[#^`*%w/(' ],
227                                 'foo=1[#^`*%w/(',
228                                 'All kind of characters are allowed as values'
229                         ],
230                         [
231                                 [ 'foo' => '1[#^`*%\'w/(' ],
232                                 'foo="1[#^`*%\'w/("',
233                                 'Double quotes are allowed if quoted by single quotes'
234                         ],
235                         [
236                                 [ 'foo' => '1[#^`*%"w/(' ],
237                                 'foo=\'1[#^`*%"w/(\'',
238                                 'Single quotes are allowed if quoted by double quotes'
239                         ],
240                         [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
241                         [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
242                 ];
243         }
244
245         /**
246          * @dataProvider provideDeprecatedAttributes
247          * @covers Sanitizer::fixTagAttributes
248          */
249         public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
250                 $this->assertEquals( " $inputAttr",
251                         Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
252                         $message
253                 );
254         }
255
256         public static function provideDeprecatedAttributes() {
257                 /** [ <attribute>, <element>, [message] ] */
258                 return [
259                         [ 'clear="left"', 'br' ],
260                         [ 'clear="all"', 'br' ],
261                         [ 'width="100"', 'td' ],
262                         [ 'nowrap="true"', 'td' ],
263                         [ 'nowrap=""', 'td' ],
264                         [ 'align="right"', 'td' ],
265                         [ 'align="center"', 'table' ],
266                         [ 'align="left"', 'tr' ],
267                         [ 'align="center"', 'div' ],
268                         [ 'align="left"', 'h1' ],
269                         [ 'align="left"', 'p' ],
270                 ];
271         }
272
273         /**
274          * @dataProvider provideCssCommentsFixtures
275          * @covers Sanitizer::checkCss
276          */
277         public function testCssCommentsChecking( $expected, $css, $message = '' ) {
278                 $this->assertEquals( $expected,
279                         Sanitizer::checkCss( $css ),
280                         $message
281                 );
282         }
283
284         public static function provideCssCommentsFixtures() {
285                 /** [ <expected>, <css>, [message] ] */
286                 return [
287                         // Valid comments spanning entire input
288                         [ '/**/', '/**/' ],
289                         [ '/* comment */', '/* comment */' ],
290                         // Weird stuff
291                         [ ' ', '/****/' ],
292                         [ ' ', '/* /* */' ],
293                         [ 'display: block;', "display:/* foo */block;" ],
294                         [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
295                                 'Backslash-escaped comments must be stripped (T30450)' ],
296                         [ '', '/* unfinished comment structure',
297                                 'Remove anything after a comment-start token' ],
298                         [ '', "\\2f\\2a unifinished comment'",
299                                 'Remove anything after a backslash-escaped comment-start token' ],
300                         [
301                                 '/* insecure input */',
302                                 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
303                                         . '(src=\'asdf.png\',sizingMethod=\'scale\');'
304                         ],
305                         [
306                                 '/* insecure input */',
307                                 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
308                                         . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
309                         ],
310                         [ '/* insecure input */', 'width: expression(1+1);' ],
311                         [ '/* insecure input */', 'background-image: image(asdf.png);' ],
312                         [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
313                         [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
314                         [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
315                         [
316                                 '/* insecure input */',
317                                 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
318                         ],
319                         [
320                                 '/* insecure input */',
321                                 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
322                         ],
323                         [ '/* insecure input */', 'foo: attr( title, url );' ],
324                         [ '/* insecure input */', 'foo: attr( title url );' ],
325                         [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
326                 ];
327         }
328
329         /**
330          * @dataProvider provideEscapeHtmlAllowEntities
331          * @covers Sanitizer::escapeHtmlAllowEntities
332          */
333         public function testEscapeHtmlAllowEntities( $expected, $html ) {
334                 $this->assertEquals(
335                         $expected,
336                         Sanitizer::escapeHtmlAllowEntities( $html )
337                 );
338         }
339
340         public static function provideEscapeHtmlAllowEntities() {
341                 return [
342                         [ 'foo', 'foo' ],
343                         [ 'a¡b', 'a&#161;b' ],
344                         [ 'foo&#039;bar', "foo'bar" ],
345                         [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
346                 ];
347         }
348
349         /**
350          * Test Sanitizer::escapeId
351          *
352          * @dataProvider provideEscapeId
353          * @covers Sanitizer::escapeId
354          */
355         public function testEscapeId( $input, $output ) {
356                 $this->assertEquals(
357                         $output,
358                         Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
359                 );
360         }
361
362         public static function provideEscapeId() {
363                 return [
364                         [ '+', '.2B' ],
365                         [ '&', '.26' ],
366                         [ '=', '.3D' ],
367                         [ ':', ':' ],
368                         [ ';', '.3B' ],
369                         [ '@', '.40' ],
370                         [ '$', '.24' ],
371                         [ '-_.', '-_.' ],
372                         [ '!', '.21' ],
373                         [ '*', '.2A' ],
374                         [ '/', '.2F' ],
375                         [ '[]', '.5B.5D' ],
376                         [ '<>', '.3C.3E' ],
377                         [ '\'', '.27' ],
378                         [ '§', '.C2.A7' ],
379                         [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
380                         [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
381                 ];
382         }
383
384         /**
385          * Test escapeIdReferenceList for consistency with escapeIdForAttribute
386          *
387          * @dataProvider provideEscapeIdReferenceList
388          * @covers Sanitizer::escapeIdReferenceList
389          */
390         public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
391                 $this->assertEquals(
392                         Sanitizer::escapeIdReferenceList( $referenceList, 'noninitial' ),
393                         Sanitizer::escapeIdForAttribute( $id1 )
394                                 . ' '
395                                 . Sanitizer::escapeIdForAttribute( $id2 )
396                 );
397         }
398
399         public static function provideEscapeIdReferenceList() {
400                 /** [ <reference list>, <individual id 1>, <individual id 2> ] */
401                 return [
402                         [ 'foo bar', 'foo', 'bar' ],
403                         [ '#1 #2', '#1', '#2' ],
404                         [ '+1 +2', '+1', '+2' ],
405                 ];
406         }
407
408         /**
409          * @dataProvider provideIsReservedDataAttribute
410          */
411         public function testIsReservedDataAttribute( $attr, $expected ) {
412                 $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
413         }
414
415         public static function provideIsReservedDataAttribute() {
416                 return [
417                         [ 'foo', false ],
418                         [ 'data', false ],
419                         [ 'data-foo', false ],
420                         [ 'data-mw', true ],
421                         [ 'data-ooui', true ],
422                         [ 'data-parsoid', true ],
423                         [ 'data-mw-foo', true ],
424                         [ 'data-ooui-foo', true ],
425                         [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
426                 ];
427         }
428
429         /**
430          * @dataProvider provideEscapeIdForStuff
431          *
432          * @covers Sanitizer::escapeIdForAttribute()
433          * @covers Sanitizer::escapeIdForLink()
434          * @covers Sanitizer::escapeIdForExternalInterwiki()
435          * @covers Sanitizer::escapeIdInternal()
436          *
437          * @param string $stuff
438          * @param string[] $config
439          * @param string $id
440          * @param string|false $expected
441          * @param int|null $mode
442          */
443         public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
444                 $func = "Sanitizer::escapeIdFor{$stuff}";
445                 $iwFlavor = array_pop( $config );
446                 $this->setMwGlobals( [
447                         'wgFragmentMode' => $config,
448                         'wgExternalInterwikiFragmentMode' => $iwFlavor,
449                 ] );
450                 $escaped = call_user_func( $func, $id, $mode );
451                 self::assertEquals( $expected, $escaped );
452         }
453
454         public function provideEscapeIdForStuff() {
455                 // Test inputs and outputs
456                 $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
457                 $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
458                         '.26.26amp.3B.26amp.3Bamp.3B';
459                 $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
460                 $html5Experimental = 'foo_тест_!_()[]:<>_amp;_amp;amp;';
461
462                 // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
463                 $legacy = [ 'legacy', 'legacy' ];
464                 $legacyNew = [ 'legacy', 'html5', 'legacy' ];
465                 $newLegacy = [ 'html5', 'legacy', 'legacy' ];
466                 $new = [ 'html5', 'legacy' ];
467                 $allNew = [ 'html5', 'html5' ];
468                 $experimentalLegacy = [ 'html5-legacy', 'legacy', 'legacy' ];
469                 $newExperimental = [ 'html5', 'html5-legacy', 'legacy' ];
470
471                 return [
472                         // Pure legacy: how MW worked before 2017
473                         [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
474                         [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
475                         [ 'Link', $legacy, $text, $legacyEncoded ],
476                         [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
477
478                         // Transition to a new world: legacy links with HTML5 fallback
479                         [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
480                         [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
481                         [ 'Link', $legacyNew, $text, $legacyEncoded ],
482                         [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
483
484                         // New world: HTML5 links, legacy fallbacks
485                         [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
486                         [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
487                         [ 'Link', $newLegacy, $text, $html5Encoded ],
488                         [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
489
490                         // Distant future: no legacy fallbacks, but still linking to leagacy wikis
491                         [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
492                         [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
493                         [ 'Link', $new, $text, $html5Encoded ],
494                         [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
495
496                         // Just before the heat death of universe: external interwikis are also HTML5 \m/
497                         [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
498                         [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
499                         [ 'Link', $allNew, $text, $html5Encoded ],
500                         [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
501
502                         // Someone flipped $wgExperimentalHtmlIds on
503                         [ 'Attribute', $experimentalLegacy, $text, $html5Experimental, Sanitizer::ID_PRIMARY ],
504                         [ 'Attribute', $experimentalLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
505                         [ 'Link', $experimentalLegacy, $text, $html5Experimental ],
506                         [ 'ExternalInterwiki', $experimentalLegacy, $text, $legacyEncoded ],
507
508                         // Migration from $wgExperimentalHtmlIds to modern HTML5
509                         [ 'Attribute', $newExperimental, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
510                         [ 'Attribute', $newExperimental, $text, $html5Experimental, Sanitizer::ID_FALLBACK ],
511                         [ 'Link', $newExperimental, $text, $html5Encoded ],
512                         [ 'ExternalInterwiki', $newExperimental, $text, $legacyEncoded ],
513                 ];
514         }
515
516         /**
517          * @expectedException InvalidArgumentException
518          * @covers Sanitizer::escapeIdInternal()
519          */
520         public function testInvalidFragmentThrows() {
521                 $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
522                 Sanitizer::escapeIdForAttribute( 'This should throw' );
523         }
524
525         /**
526          * @expectedException UnexpectedValueException
527          * @covers Sanitizer::escapeIdForAttribute()
528          */
529         public function testNoPrimaryFragmentModeThrows() {
530                 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
531                 Sanitizer::escapeIdForAttribute( 'This should throw' );
532         }
533
534         /**
535          * @expectedException UnexpectedValueException
536          * @covers Sanitizer::escapeIdForLink()
537          */
538         public function testNoPrimaryFragmentModeThrows2() {
539                 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
540                 Sanitizer::escapeIdForLink( 'This should throw' );
541         }
542 }