]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blobdiff - vendor/wikimedia/remex-html/RemexHtml/TreeBuilder/Initial.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / vendor / wikimedia / remex-html / RemexHtml / TreeBuilder / Initial.php
diff --git a/vendor/wikimedia/remex-html/RemexHtml/TreeBuilder/Initial.php b/vendor/wikimedia/remex-html/RemexHtml/TreeBuilder/Initial.php
new file mode 100644 (file)
index 0000000..f368b21
--- /dev/null
@@ -0,0 +1,110 @@
+<?php
+
+namespace RemexHtml\TreeBuilder;
+use RemexHtml\HTMLData;
+use RemexHtml\Tokenizer\Attributes;
+
+/**
+ * The "initial" insertion mode
+ */
+class Initial extends InsertionMode {
+       /**
+        * The doctypes listed in the spec which are allowed without generating a
+        * parse error. A 2-d array where each row gives the doctype name, the
+        * public identifier and the system identifier.
+        */
+       private static $allowedDoctypes = [
+               [ 'html', '-//W3C//DTD HTML 4.0//EN', null ],
+               [ 'html', '-//W3C//DTD HTML 4.0//EN', 'http://www.w3.org/TR/REC-html40/strict.dtd' ],
+               [ 'html', '-//W3C//DTD HTML 4.01//EN', null ],
+               [ 'html', '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ],
+               [ 'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
+                       'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ],
+               [ 'html', '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ]
+       ];
+
+       public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
+               // Ignore whitespace
+               list( $part1, $part2 ) = $this->splitInitialMatch(
+                       true, "\t\n\f\r ", $text, $start, $length, $sourceStart, $sourceLength );
+               list( $start, $length, $sourceStart, $sourceLength ) = $part2;
+               if ( !$length ) {
+                       return;
+               }
+               if ( !$this->builder->isIframeSrcdoc ) {
+                       $this->error( 'missing doctype', $sourceStart );
+                       $this->builder->quirks = TreeBuilder::QUIRKS;
+               }
+               $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
+                       ->characters( $text, $start, $length, $sourceStart, $sourceLength );
+       }
+
+       public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
+               if ( !$this->builder->isIframeSrcdoc ) {
+                       $this->error( 'missing doctype', $sourceStart );
+                       $this->builder->quirks = TreeBuilder::QUIRKS;
+               }
+               $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
+                       ->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
+       }
+
+       public function endTag( $name, $sourceStart, $sourceLength ) {
+               if ( !$this->builder->isIframeSrcdoc ) {
+                       $this->error( 'missing doctype', $sourceStart );
+                       $this->builder->quirks = TreeBuilder::QUIRKS;
+               }
+               $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
+                       ->endTag( $name, $sourceStart, $sourceLength );
+       }
+
+       public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
+               if ( ( $name !== 'html' || $public !== null
+                               || ( $system !== null && $system !== 'about:legacy-compat' )
+                       )
+                       && !in_array( [ $name, $public, $system ], self::$allowedDoctypes, true )
+               ) {
+                       $this->error( 'invalid doctype', $sourceStart );
+               }
+
+               $quirks = $quirks ? TreeBuilder::QUIRKS : TreeBuilder::NO_QUIRKS;
+
+               $quirksIfNoSystem = '~-//W3C//DTD HTML 4\.01 Frameset//|' .
+                       '-//W3C//DTD HTML 4\.01 Transitional//~Ai';
+               $limitedQuirks = '~-//W3C//DTD XHTML 1\.0 Frameset//|' .
+                       '-//W3C//DTD XHTML 1\.0 Transitional//~Ai';
+
+               if ( $name !== 'html'
+                       || $public === '-//W3O//DTD W3 HTML Strict 3.0//EN//'
+                       || $public === '-/W3C/DTD HTML 4.0 Transitional/EN'
+                       || $public === 'HTML'
+                       || $system === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
+                       || ( $system === null && preg_match( $quirksIfNoSystem, $public ) )
+                       || preg_match( HTMLData::$quirkyPrefixRegex, $public )
+               ) {
+                       $quirks = TreeBuilder::QUIRKS;
+               } elseif ( !$this->builder->isIframeSrcdoc
+                       && (
+                               preg_match( $limitedQuirks, $public )
+                               || ( $system !== null && preg_match( $quirksIfNoSystem, $public ) )
+                       )
+               ) {
+                       $quirks = TreeBuilder::LIMITED_QUIRKS;
+               }
+
+               $name = $name === null ? '' : $name;
+               $public = $public === null ? '' : $public;
+               $system = $system === null ? '' : $system;
+               $this->builder->doctype( $name, $public, $system, $quirks,
+                       $sourceStart, $sourceLength );
+               $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML );
+       }
+
+       public function endDocument( $pos ) {
+               if ( !$this->builder->isIframeSrcdoc ) {
+                       $this->error( 'missing doctype', $pos );
+                       $this->builder->quirks = TreeBuilder::QUIRKS;
+               }
+               $this->dispatcher->switchMode( Dispatcher::BEFORE_HTML )
+                       ->endDocument( $pos );
+       }
+}