]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - vendor/wikimedia/remex-html/RemexHtml/TreeBuilder/Initial.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / vendor / wikimedia / remex-html / RemexHtml / TreeBuilder / Initial.php
1 <?php
2
3 namespace RemexHtml\TreeBuilder;
4 use RemexHtml\HTMLData;
5 use RemexHtml\Tokenizer\Attributes;
6
7 /**
8  * The "initial" insertion mode
9  */
10 class Initial extends InsertionMode {
11         /**
12          * The doctypes listed in the spec which are allowed without generating a
13          * parse error. A 2-d array where each row gives the doctype name, the
14          * public identifier and the system identifier.
15          */
16         private static $allowedDoctypes = [
17                 [ 'html', '-//W3C//DTD HTML 4.0//EN', null ],
18                 [ 'html', '-//W3C//DTD HTML 4.0//EN', 'http://www.w3.org/TR/REC-html40/strict.dtd' ],
19                 [ 'html', '-//W3C//DTD HTML 4.01//EN', null ],
20                 [ 'html', '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ],
21                 [ 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
22                         'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ],
23                 [ 'html', '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ]
24         ];
25
26         public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
27                 // Ignore whitespace
28                 list( $part1, $part2 ) = $this->splitInitialMatch(
29                         true, "\t\n\f\r ", $text, $start, $length, $sourceStart, $sourceLength );
30                 list( $start, $length, $sourceStart, $sourceLength ) = $part2;
31                 if ( !$length ) {
32                         return;
33                 }
34                 if ( !$this->builder->isIframeSrcdoc ) {
35                         $this->error( 'missing doctype', $sourceStart );
36                         $this->builder->quirks = TreeBuilder::QUIRKS;
37                 }
38                 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
39                         ->characters( $text, $start, $length, $sourceStart, $sourceLength );
40         }
41
42         public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
43                 if ( !$this->builder->isIframeSrcdoc ) {
44                         $this->error( 'missing doctype', $sourceStart );
45                         $this->builder->quirks = TreeBuilder::QUIRKS;
46                 }
47                 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
48                         ->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
49         }
50
51         public function endTag( $name, $sourceStart, $sourceLength ) {
52                 if ( !$this->builder->isIframeSrcdoc ) {
53                         $this->error( 'missing doctype', $sourceStart );
54                         $this->builder->quirks = TreeBuilder::QUIRKS;
55                 }
56                 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
57                         ->endTag( $name, $sourceStart, $sourceLength );
58         }
59
60         public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
61                 if ( ( $name !== 'html' || $public !== null
62                                 || ( $system !== null && $system !== 'about:legacy-compat' )
63                         )
64                         && !in_array( [ $name, $public, $system ], self::$allowedDoctypes, true )
65                 ) {
66                         $this->error( 'invalid doctype', $sourceStart );
67                 }
68
69                 $quirks = $quirks ? TreeBuilder::QUIRKS : TreeBuilder::NO_QUIRKS;
70
71                 $quirksIfNoSystem = '~-//W3C//DTD HTML 4\.01 Frameset//|' .
72                         '-//W3C//DTD HTML 4\.01 Transitional//~Ai';
73                 $limitedQuirks = '~-//W3C//DTD XHTML 1\.0 Frameset//|' .
74                         '-//W3C//DTD XHTML 1\.0 Transitional//~Ai';
75
76                 if ( $name !== 'html'
77                         || $public === '-//W3O//DTD W3 HTML Strict 3.0//EN//'
78                         || $public === '-/W3C/DTD HTML 4.0 Transitional/EN'
79                         || $public === 'HTML'
80                         || $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
81                         || ( $system === null && preg_match( $quirksIfNoSystem, $public ) )
82                         || preg_match( HTMLData::$quirkyPrefixRegex, $public )
83                 ) {
84                         $quirks = TreeBuilder::QUIRKS;
85                 } elseif ( !$this->builder->isIframeSrcdoc
86                         && (
87                                 preg_match( $limitedQuirks, $public )
88                                 || ( $system !== null && preg_match( $quirksIfNoSystem, $public ) )
89                         )
90                 ) {
91                         $quirks = TreeBuilder::LIMITED_QUIRKS;
92                 }
93
94                 $name = $name === null ? '' : $name;
95                 $public = $public === null ? '' : $public;
96                 $system = $system === null ? '' : $system;
97                 $this->builder->doctype( $name, $public, $system, $quirks,
98                         $sourceStart, $sourceLength );
99                 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML );
100         }
101
102         public function endDocument( $pos ) {
103                 if ( !$this->builder->isIframeSrcdoc ) {
104                         $this->error( 'missing doctype', $pos );
105                         $this->builder->quirks = TreeBuilder::QUIRKS;
106                 }
107                 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
108                         ->endDocument( $pos );
109         }
110 }