3 namespace RemexHtml\TreeBuilder;
4 use RemexHtml\HTMLData;
5 use RemexHtml\Tokenizer\Attributes;
8 * The "initial" insertion mode
10 class Initial extends InsertionMode {
12 * The doctypes listed in the spec which are allowed without generating a
13 * parse error. A 2-d array where each row gives the doctype name, the
14 * public identifier and the system identifier.
16 private static $allowedDoctypes = [
17 [ 'html', '-//W3C//DTD HTML 4.0//EN', null ],
18 [ 'html', '-//W3C//DTD HTML 4.0//EN', 'http://www.w3.org/TR/REC-html40/strict.dtd' ],
19 [ 'html', '-//W3C//DTD HTML 4.01//EN', null ],
20 [ 'html', '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ],
21 [ 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
22 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ],
23 [ 'html', '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ]
26 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
28 list( $part1, $part2 ) = $this->splitInitialMatch(
29 true, "\t\n\f\r ", $text, $start, $length, $sourceStart, $sourceLength );
30 list( $start, $length, $sourceStart, $sourceLength ) = $part2;
34 if ( !$this->builder->isIframeSrcdoc ) {
35 $this->error( 'missing doctype', $sourceStart );
36 $this->builder->quirks = TreeBuilder::QUIRKS;
38 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
39 ->characters( $text, $start, $length, $sourceStart, $sourceLength );
42 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
43 if ( !$this->builder->isIframeSrcdoc ) {
44 $this->error( 'missing doctype', $sourceStart );
45 $this->builder->quirks = TreeBuilder::QUIRKS;
47 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
48 ->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
51 public function endTag( $name, $sourceStart, $sourceLength ) {
52 if ( !$this->builder->isIframeSrcdoc ) {
53 $this->error( 'missing doctype', $sourceStart );
54 $this->builder->quirks = TreeBuilder::QUIRKS;
56 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
57 ->endTag( $name, $sourceStart, $sourceLength );
60 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
61 if ( ( $name !== 'html' || $public !== null
62 || ( $system !== null && $system !== 'about:legacy-compat' )
64 && !in_array( [ $name, $public, $system ], self::$allowedDoctypes, true )
66 $this->error( 'invalid doctype', $sourceStart );
69 $quirks = $quirks ? TreeBuilder::QUIRKS : TreeBuilder::NO_QUIRKS;
71 $quirksIfNoSystem = '~-//W3C//DTD HTML 4\.01 Frameset//|' .
72 '-//W3C//DTD HTML 4\.01 Transitional//~Ai';
73 $limitedQuirks = '~-//W3C//DTD XHTML 1\.0 Frameset//|' .
74 '-//W3C//DTD XHTML 1\.0 Transitional//~Ai';
77 || $public === '-//W3O//DTD W3 HTML Strict 3.0//EN//'
78 || $public === '-/W3C/DTD HTML 4.0 Transitional/EN'
80 || $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
81 || ( $system === null && preg_match( $quirksIfNoSystem, $public ) )
82 || preg_match( HTMLData::$quirkyPrefixRegex, $public )
84 $quirks = TreeBuilder::QUIRKS;
85 } elseif ( !$this->builder->isIframeSrcdoc
87 preg_match( $limitedQuirks, $public )
88 || ( $system !== null && preg_match( $quirksIfNoSystem, $public ) )
91 $quirks = TreeBuilder::LIMITED_QUIRKS;
94 $name = $name === null ? '' : $name;
95 $public = $public === null ? '' : $public;
96 $system = $system === null ? '' : $system;
97 $this->builder->doctype( $name, $public, $system, $quirks,
98 $sourceStart, $sourceLength );
99 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML );
102 public function endDocument( $pos ) {
103 if ( !$this->builder->isIframeSrcdoc ) {
104 $this->error( 'missing doctype', $pos );
105 $this->builder->quirks = TreeBuilder::QUIRKS;
107 $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
108 ->endDocument( $pos );