]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - tests/phpunit/includes/tidy/BalancerTest.php
MediaWiki 1.30.2-scripts
[autoinstalls/mediawiki.git] / tests / phpunit / includes / tidy / BalancerTest.php
1 <?php
2
3 class BalancerTest extends MediaWikiTestCase {
4
5         /**
6          * Anything that needs to happen before your tests should go here.
7          */
8         protected function setUp() {
9                 // Be sure to do call the parent setup and teardown functions.
10                 // This makes sure that all the various cleanup and restorations
11                 // happen as they should (including the restoration for setMwGlobals).
12                 parent::setUp();
13         }
14
15         /**
16          * @covers MediaWiki\Tidy\Balancer
17          * @covers MediaWiki\Tidy\BalanceSets
18          * @covers MediaWiki\Tidy\BalanceElement
19          * @covers MediaWiki\Tidy\BalanceStack
20          * @covers MediaWiki\Tidy\BalanceMarker
21          * @covers MediaWiki\Tidy\BalanceActiveFormattingElements
22          * @dataProvider provideBalancerTests
23          */
24         public function testBalancer( $description, $input, $expected, $useTidy ) {
25                 $balancer = new MediaWiki\Tidy\Balancer( [
26                         'strict' => false, /* not strict */
27                         'allowedHtmlElements' => null, /* no sanitization */
28                         'tidyCompat' => $useTidy, /* standard parser */
29                         'allowComments' => true, /* comment parsing */
30                 ] );
31                 $output = $balancer->balance( $input );
32
33                 // Ignore self-closing tags
34                 $output = preg_replace( '/\s*\/>/', '>', $output );
35
36                 $this->assertEquals( $expected, $output, $description );
37         }
38
39         public static function provideBalancerTests() {
40                 // Get the tests from html5lib-tests.json
41                 $json = json_decode( file_get_contents(
42                         __DIR__ . '/html5lib-tests.json'
43                 ), true );
44                 // Munge this slightly into the format phpunit expects
45                 // for providers, and filter out HTML constructs which
46                 // the balancer doesn't support.
47                 $tests = [];
48                 $okre = "~ \A
49                         (?i:<!DOCTYPE\ html>)?
50                         <html><head></head><body>
51                         .*
52                         </body></html>
53                 \z ~xs";
54                 foreach ( $json as $filename => $cases ) {
55                         foreach ( $cases as $case ) {
56                                 $html = $case['document']['html'];
57                                 if ( !preg_match( $okre, $html ) ) {
58                                         // Skip tests which involve stuff in the <head> or
59                                         // weird doctypes.
60                                         continue;
61                                 }
62                                 // We used to do this:
63                                 //   $html = substr( $html, strlen( $start ), -strlen( $end ) );
64                                 // But now we use a different field in the test case,
65                                 // which reports how domino would parse this case in a
66                                 // no-quirks <body> context.  (The original test case may
67                                 // have had a different context, or relied on quirks mode.)
68                                 $html = $case['document']['noQuirksBodyHtml'];
69                                 // Normalize case of SVG attributes.
70                                 $html = str_replace( 'foreignObject', 'foreignobject', $html );
71                                 // Normalize case of MathML attributes.
72                                 $html = str_replace( 'definitionURL', 'definitionurl', $html );
73
74                                 if (
75                                         isset( $case['document']['props']['comment'] ) &&
76                                         preg_match( ',<!--[^>]*<,', $html )
77                                 ) {
78                                         // Skip tests which include HTML comments containing
79                                         // the < character, which we don't support.
80                                         continue;
81                                 }
82                                 if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
83                                         // Skip tests involving <![CDATA[ ]]> quoting.
84                                         continue;
85                                 }
86                                 if (
87                                         stripos( $case['data'], '<!DOCTYPE' ) !== false &&
88                                         stripos( $case['data'], '<!DOCTYPE html>' ) === false
89                                 ) {
90                                         // Skip tests involving unusual doctypes.
91                                         continue;
92                                 }
93                                 $literalre = "~ <rdar: | < /? (
94                                         html | head | body | frame | frameset | plaintext
95                                 ) > ~xi";
96                                 if ( preg_match( $literalre, $case['data'] ) ) {
97                                         // Skip tests involving some literal tags, which are
98                                         // unsupported but don't show up in the expected output.
99                                         continue;
100                                 }
101                                 if (
102                                         isset( $case['document']['props']['tags']['iframe'] ) ||
103                                         isset( $case['document']['props']['tags']['noembed'] ) ||
104                                         isset( $case['document']['props']['tags']['noscript'] ) ||
105                                         isset( $case['document']['props']['tags']['script'] ) ||
106                                         isset( $case['document']['props']['tags']['svg script'] ) ||
107                                         isset( $case['document']['props']['tags']['svg title'] ) ||
108                                         isset( $case['document']['props']['tags']['title'] ) ||
109                                         isset( $case['document']['props']['tags']['xmp'] )
110                                 ) {
111                                         // Skip tests with unsupported tags which *do* show
112                                         // up in the expected output.
113                                         continue;
114                                 }
115                                 if (
116                                         $filename === 'entities01.dat' ||
117                                         $filename === 'entities02.dat' ||
118                                         preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
119                                         preg_match( '/^(&|&#|&#X|&#x|&#45|&x-test|&AMP)$/', $case['data'] )
120                                 ) {
121                                         // Skip tests involving entity encoding.
122                                         continue;
123                                 }
124                                 if (
125                                         isset( $case['document']['props']['tagWithLt'] ) ||
126                                         isset( $case['document']['props']['attrWithFunnyChar'] ) ||
127                                         preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
128                                         preg_match( ':</p<p>:', $case['data'] ) ||
129                                         preg_match( ':<b &=&amp>|<p/x/y/z>:', $case['data'] )
130                                 ) {
131                                         // Skip tests with funny tag or attribute names,
132                                         // which are really tests of the HTML tokenizer, not
133                                         // the tree builder.
134                                         continue;
135                                 }
136                                 if (
137                                         preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] )
138                                 ) {
139                                         // The Sanitizer normalizes whitespace in attribute
140                                         // values, which makes this test case invalid.
141                                         continue;
142                                 }
143                                 if ( $filename === 'plain-text-unsafe.dat' ) {
144                                         // Skip tests with ASCII null, etc.
145                                         continue;
146                                 }
147                                 $data = preg_replace(
148                                         '~<!DOCTYPE html>~i', '', $case['data']
149                                 );
150                                 $tests[] = [
151                                         $filename, # use better description?
152                                         $data,
153                                         $html,
154                                         false # strict HTML5 compat mode, no tidy
155                                 ];
156                         }
157                 }
158
159                 # Some additional tests for mediawiki-specific features
160                 $tests[] = [
161                         'Round-trip serialization for <pre>/<listing>/<textarea>',
162                         "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
163                         "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
164                         true # use the tidy-compatible mode
165                 ];
166
167                 return $tests;
168         }
169 }