]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - includes/tidy/Balancer.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / includes / tidy / Balancer.php
1 <?php
2 /**
3  * An implementation of the tree building portion of the HTML5 parsing
4  * spec.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19  * http://www.gnu.org/copyleft/gpl.html
20  *
21  * @file
22  * @ingroup Parser
23  * @since 1.27
24  * @author C. Scott Ananian, 2016
25  */
26 namespace MediaWiki\Tidy;
27
28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
30 use \ExplodeIterator;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
33 use \Sanitizer;
34
35 // A note for future librarization[1] -- this file is a good candidate
36 // for splitting into an independent library, except that it is currently
37 // highly optimized for MediaWiki use.  It only implements the portions
38 // of the HTML5 tree builder used by tags supported by MediaWiki, and
39 // does not contain a true tokenizer pass, instead relying on
40 // comment stripping, attribute normalization, and escaping done by
41 // the MediaWiki Sanitizer.  It also deliberately avoids building
42 // a true DOM in memory, instead serializing elements to an output string
43 // as soon as possible (usually as soon as the tag is closed) to reduce
44 // its memory footprint.
45
46 // We've been gradually lifting some of these restrictions to handle
47 // non-sanitized output generated by extensions, but we shortcut the tokenizer
48 // for speed (primarily by splitting on `<`) and so rely on syntactic
49 // well-formedness.
50
51 // On the other hand, I've been pretty careful to note with comments in the
52 // code the places where this implementation omits features of the spec or
53 // depends on the MediaWiki Sanitizer.  Perhaps in the future we'll want to
54 // implement the missing pieces and make this a standalone PHP HTML5 parser.
55 // In order to do so, some sort of MediaWiki-specific API will need
56 // to be added to (a) allow the Balancer to bypass the tokenizer,
57 // and (b) support on-the-fly flattening instead of DOM node creation.
58
59 // [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
60
61 /**
62  * Utility constants and sets for the HTML5 tree building algorithm.
63  * Sets are associative arrays indexed first by namespace and then by
64  * lower-cased tag name.
65  *
66  * @ingroup Parser
67  * @since 1.27
68  */
69 class BalanceSets {
70         const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
71         const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
72         const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
73
74         public static $unsupportedSet = [
75                 self::HTML_NAMESPACE => [
76                         'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
77                         'frame' => true,
78                         'plaintext' => true,
79                         'xmp' => true, 'iframe' => true, 'noembed' => true,
80                         'noscript' => true, 'script' => true,
81                         'title' => true
82                 ]
83         ];
84
85         public static $emptyElementSet = [
86                 self::HTML_NAMESPACE => [
87                         'area' => true, 'base' => true, 'basefont' => true,
88                         'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
89                         'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
90                         'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
91                         'param' => true, 'source' => true, 'track' => true, 'wbr' => true
92                 ]
93         ];
94
95         public static $extraLinefeedSet = [
96                 self::HTML_NAMESPACE => [
97                         'pre' => true, 'textarea' => true, 'listing' => true,
98                 ]
99         ];
100
101         public static $headingSet = [
102                 self::HTML_NAMESPACE => [
103                         'h1' => true, 'h2' => true, 'h3' => true,
104                         'h4' => true, 'h5' => true, 'h6' => true
105                 ]
106         ];
107
108         public static $specialSet = [
109                 self::HTML_NAMESPACE => [
110                         'address' => true, 'applet' => true, 'area' => true,
111                         'article' => true, 'aside' => true, 'base' => true,
112                         'basefont' => true, 'bgsound' => true, 'blockquote' => true,
113                         'body' => true, 'br' => true, 'button' => true, 'caption' => true,
114                         'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
115                         'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
116                         'dt' => true, 'embed' => true, 'fieldset' => true,
117                         'figcaption' => true, 'figure' => true, 'footer' => true,
118                         'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
119                         'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
120                         'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
121                         'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
122                         'input' => true, 'li' => true, 'link' => true,
123                         'listing' => true, 'main' => true, 'marquee' => true,
124                         'menu' => true, 'meta' => true, 'nav' => true,
125                         'noembed' => true, 'noframes' => true, 'noscript' => true,
126                         'object' => true, 'ol' => true, 'p' => true, 'param' => true,
127                         'plaintext' => true, 'pre' => true, 'script' => true,
128                         'section' => true, 'select' => true, 'source' => true,
129                         'style' => true, 'summary' => true, 'table' => true,
130                         'tbody' => true, 'td' => true, 'template' => true,
131                         'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
132                         'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
133                         'wbr' => true, 'xmp' => true
134                 ],
135                 self::SVG_NAMESPACE => [
136                         'foreignobject' => true, 'desc' => true, 'title' => true
137                 ],
138                 self::MATHML_NAMESPACE => [
139                         'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
140                         'mtext' => true, 'annotation-xml' => true
141                 ]
142         ];
143
144         public static $addressDivPSet = [
145                 self::HTML_NAMESPACE => [
146                         'address' => true, 'div' => true, 'p' => true
147                 ]
148         ];
149
150         public static $tableSectionRowSet = [
151                 self::HTML_NAMESPACE => [
152                         'table' => true, 'thead' => true, 'tbody' => true,
153                         'tfoot' => true, 'tr' => true
154                 ]
155         ];
156
157         public static $impliedEndTagsSet = [
158                 self::HTML_NAMESPACE => [
159                         'dd' => true, 'dt' => true, 'li' => true,
160                         'menuitem' => true, 'optgroup' => true,
161                         'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
162                         'rt' => true, 'rtc' => true
163                 ]
164         ];
165
166         public static $thoroughImpliedEndTagsSet = [
167                 self::HTML_NAMESPACE => [
168                         'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
169                         'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
170                         'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
171                         'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
172                         'thead' => true, 'tr' => true
173                 ]
174         ];
175
176         public static $tableCellSet = [
177                 self::HTML_NAMESPACE => [
178                         'td' => true, 'th' => true
179                 ]
180         ];
181         public static $tableContextSet = [
182                 self::HTML_NAMESPACE => [
183                         'table' => true, 'template' => true, 'html' => true
184                 ]
185         ];
186
187         public static $tableBodyContextSet = [
188                 self::HTML_NAMESPACE => [
189                         'tbody' => true, 'tfoot' => true, 'thead' => true,
190                         'template' => true, 'html' => true
191                 ]
192         ];
193
194         public static $tableRowContextSet = [
195                 self::HTML_NAMESPACE => [
196                         'tr' => true, 'template' => true, 'html' => true
197                 ]
198         ];
199
200         // See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
201         public static $formAssociatedSet = [
202                 self::HTML_NAMESPACE => [
203                         'button' => true, 'fieldset' => true, 'input' => true,
204                         'keygen' => true, 'object' => true, 'output' => true,
205                         'select' => true, 'textarea' => true, 'img' => true
206                 ]
207         ];
208
209         public static $inScopeSet = [
210                 self::HTML_NAMESPACE => [
211                         'applet' => true, 'caption' => true, 'html' => true,
212                         'marquee' => true, 'object' => true,
213                         'table' => true, 'td' => true, 'template' => true,
214                         'th' => true
215                 ],
216                 self::SVG_NAMESPACE => [
217                         'foreignobject' => true, 'desc' => true, 'title' => true
218                 ],
219                 self::MATHML_NAMESPACE => [
220                         'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
221                         'mtext' => true, 'annotation-xml' => true
222                 ]
223         ];
224
225         private static $inListItemScopeSet = null;
226         public static function inListItemScopeSet() {
227                 if ( self::$inListItemScopeSet === null ) {
228                         self::$inListItemScopeSet = self::$inScopeSet;
229                         self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
230                         self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
231                 }
232                 return self::$inListItemScopeSet;
233         }
234
235         private static $inButtonScopeSet = null;
236         public static function inButtonScopeSet() {
237                 if ( self::$inButtonScopeSet === null ) {
238                         self::$inButtonScopeSet = self::$inScopeSet;
239                         self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
240                 }
241                 return self::$inButtonScopeSet;
242         }
243
244         public static $inTableScopeSet = [
245                 self::HTML_NAMESPACE => [
246                         'html' => true, 'table' => true, 'template' => true
247                 ]
248         ];
249
250         public static $inInvertedSelectScopeSet = [
251                 self::HTML_NAMESPACE => [
252                         'option' => true, 'optgroup' => true
253                 ]
254         ];
255
256         public static $mathmlTextIntegrationPointSet = [
257                 self::MATHML_NAMESPACE => [
258                         'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
259                         'mtext' => true
260                 ]
261         ];
262
263         public static $htmlIntegrationPointSet = [
264                 self::SVG_NAMESPACE => [
265                         'foreignobject' => true,
266                         'desc' => true,
267                         'title' => true
268                 ]
269         ];
270
271         // For tidy compatibility.
272         public static $tidyPWrapSet = [
273                 self::HTML_NAMESPACE => [
274                         'body' => true, 'blockquote' => true,
275                         // We parse with <body> as the fragment context, but the top-level
276                         // element on the stack is actually <html>.  We could use the
277                         // "adjusted current node" everywhere to work around this, but it's
278                         // easier just to add <html> to the p-wrap set.
279                         'html' => true,
280                 ],
281         ];
282         public static $tidyInlineSet = [
283                 self::HTML_NAMESPACE => [
284                         'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
285                         'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
286                         'br' => true, 'button' => true, 'cite' => true, 'code' => true,
287                         'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
288                         'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
289                         'label' => true, 'legend' => true, 'map' => true, 'object' => true,
290                         'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
291                         'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
292                         's' => true, 'samp' => true, 'select' => true, 'small' => true,
293                         'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
294                         'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
295                         'var' => true,
296                 ],
297         ];
298 }
299
300 /**
301  * A BalanceElement is a simplified version of a DOM Node.  The main
302  * difference is that we only keep BalanceElements around for nodes
303  * currently on the BalanceStack of open elements.  As soon as an
304  * element is closed, with some minor exceptions relating to the
305  * tree builder "adoption agency algorithm", the element and all its
306  * children are serialized to a string using the flatten() method.
307  * This keeps our memory usage low.
308  *
309  * @ingroup Parser
310  * @since 1.27
311  */
312 class BalanceElement {
313         /**
314          * The namespace of the element.
315          * @var string $namespaceURI
316          */
317         public $namespaceURI;
318         /**
319          * The lower-cased name of the element.
320          * @var string $localName
321          */
322         public $localName;
323         /**
324          * Attributes for the element, in array form
325          * @var array $attribs
326          */
327         public $attribs;
328
329         /**
330          * Parent of this element, or the string "flat" if this element has
331          * already been flattened into its parent.
332          * @var BalanceElement|string|null $parent
333          */
334         public $parent;
335
336         /**
337          * An array of children of this element.  Typically only the last
338          * child will be an actual BalanceElement object; the rest will
339          * be strings, representing either text nodes or flattened
340          * BalanceElement objects.
341          * @var BalanceElement[]|string[] $children
342          */
343         public $children;
344
345         /**
346          * A unique string identifier for Noah's Ark purposes, lazy initialized
347          */
348         private $noahKey;
349
350         /**
351          * The next active formatting element in the list, or null if this is the
352          * end of the AFE list or if the element is not in the AFE list.
353          */
354         public $nextAFE;
355
356         /**
357          * The previous active formatting element in the list, or null if this is
358          * the start of the list or if the element is not in the AFE list.
359          */
360         public $prevAFE;
361
362         /**
363          * The next element in the Noah's Ark species bucket.
364          */
365         public $nextNoah;
366
367         /**
368          * Make a new BalanceElement corresponding to the HTML DOM Element
369          * with the given localname, namespace, and attributes.
370          *
371          * @param string $namespaceURI The namespace of the element.
372          * @param string $localName The lowercased name of the tag.
373          * @param array $attribs Attributes of the element
374          */
375         public function __construct( $namespaceURI, $localName, array $attribs ) {
376                 $this->localName = $localName;
377                 $this->namespaceURI = $namespaceURI;
378                 $this->attribs = $attribs;
379                 $this->contents = '';
380                 $this->parent = null;
381                 $this->children = [];
382         }
383
384         /**
385          * Remove the given child from this element.
386          * @param BalanceElement $elt
387          */
388         private function removeChild( BalanceElement $elt ) {
389                 Assert::precondition(
390                         $this->parent !== 'flat', "Can't removeChild after flattening $this"
391                 );
392                 Assert::parameter(
393                         $elt->parent === $this, 'elt', 'must have $this as a parent'
394                 );
395                 $idx = array_search( $elt, $this->children, true );
396                 Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
397                 $elt->parent = null;
398                 array_splice( $this->children, $idx, 1 );
399         }
400
401         /**
402          * Find $a in the list of children and insert $b before it.
403          * @param BalanceElement $a
404          * @param BalanceElement|string $b
405          */
406         public function insertBefore( BalanceElement $a, $b ) {
407                 Assert::precondition(
408                         $this->parent !== 'flat', "Can't insertBefore after flattening."
409                 );
410                 $idx = array_search( $a, $this->children, true );
411                 Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
412                 if ( is_string( $b ) ) {
413                         array_splice( $this->children, $idx, 0, [ $b ] );
414                 } else {
415                         Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
416                         if ( $b->parent !== null ) {
417                                 $b->parent->removeChild( $b );
418                         }
419                         array_splice( $this->children, $idx, 0, [ $b ] );
420                         $b->parent = $this;
421                 }
422         }
423
424         /**
425          * Append $elt to the end of the list of children.
426          * @param BalanceElement|string $elt
427          */
428         public function appendChild( $elt ) {
429                 Assert::precondition(
430                         $this->parent !== 'flat', "Can't appendChild after flattening."
431                 );
432                 if ( is_string( $elt ) ) {
433                         array_push( $this->children, $elt );
434                         return;
435                 }
436                 // Remove $elt from parent, if it had one.
437                 if ( $elt->parent !== null ) {
438                         $elt->parent->removeChild( $elt );
439                 }
440                 array_push( $this->children, $elt );
441                 $elt->parent = $this;
442         }
443
444         /**
445          * Transfer all of the children of $elt to $this.
446          * @param BalanceElement $elt
447          */
448         public function adoptChildren( BalanceElement $elt ) {
449                 Assert::precondition(
450                         $elt->parent !== 'flat', "Can't adoptChildren after flattening."
451                 );
452                 foreach ( $elt->children as $child ) {
453                         if ( !is_string( $child ) ) {
454                                 // This is an optimization which avoids an O(n^2) set of
455                                 // array_splice operations.
456                                 $child->parent = null;
457                         }
458                         $this->appendChild( $child );
459                 }
460                 $elt->children = [];
461         }
462
463         /**
464          * Flatten this node and all of its children into a string, as specified
465          * by the HTML serialization specification, and replace this node
466          * in its parent by that string.
467          *
468          * @param array $config Balancer configuration; see Balancer::__construct().
469          * @return string
470          *
471          * @see __toString()
472          */
473         public function flatten( array $config ) {
474                 Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
475                 Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
476                 $idx = array_search( $this, $this->parent->children, true );
477                 Assert::parameter(
478                         $idx !== false, '$this', 'must be a child of its parent'
479                 );
480                 $tidyCompat = $config['tidyCompat'];
481                 if ( $tidyCompat ) {
482                         $blank = true;
483                         foreach ( $this->children as $elt ) {
484                                 if ( !is_string( $elt ) ) {
485                                         $elt = $elt->flatten( $config );
486                                 }
487                                 if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
488                                         $blank = false;
489                                 }
490                         }
491                         if ( $this->isHtmlNamed( 'mw:p-wrap' ) ) {
492                                 $this->localName = 'p';
493                         } elseif ( $blank ) {
494                                 // Add 'mw-empty-elt' class so elements can be hidden via CSS
495                                 // for compatibility with legacy tidy.
496                                 if ( !count( $this->attribs ) &&
497                                         ( $this->localName === 'tr' || $this->localName === 'li' )
498                                 ) {
499                                         $this->attribs = [ 'class' => "mw-empty-elt" ];
500                                 }
501                                 $blank = false;
502                         } elseif (
503                                 $this->isA( BalanceSets::$extraLinefeedSet ) &&
504                                 count( $this->children ) > 0 &&
505                                 substr( $this->children[0], 0, 1 ) == "\n"
506                         ) {
507                                 // Double the linefeed after pre/listing/textarea
508                                 // according to the (old) HTML5 fragment serialization
509                                 // algorithm (see https://github.com/whatwg/html/issues/944)
510                                 // to ensure this will round-trip.
511                                 array_unshift( $this->children, "\n" );
512                         }
513                         $flat = $blank ? '' : "{$this}";
514                 } else {
515                         $flat = "{$this}";
516                 }
517                 $this->parent->children[$idx] = $flat;
518                 $this->parent = 'flat'; // for assertion checking
519                 return $flat;
520         }
521
522         /**
523          * Serialize this node and all of its children to a string, as specified
524          * by the HTML serialization specification.
525          *
526          * @return string The serialization of the BalanceElement
527          * @see https://html.spec.whatwg.org/multipage/syntax.html#serialising-html-fragments
528          */
529         public function __toString() {
530                 $encAttribs = '';
531                 foreach ( $this->attribs as $name => $value ) {
532                         $encValue = Sanitizer::encodeAttribute( $value );
533                         $encAttribs .= " $name=\"$encValue\"";
534                 }
535                 if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
536                         $out = "<{$this->localName}{$encAttribs}>";
537                         $len = strlen( $out );
538                         // flatten children
539                         foreach ( $this->children as $elt ) {
540                                 $out .= "{$elt}";
541                         }
542                         $out .= "</{$this->localName}>";
543                 } else {
544                         $out = "<{$this->localName}{$encAttribs} />";
545                         Assert::invariant(
546                                 count( $this->children ) === 0,
547                                 "Empty elements shouldn't have children."
548                         );
549                 }
550                 return $out;
551         }
552
553         // Utility functions on BalanceElements.
554
555         /**
556          * Determine if $this represents a specific HTML tag, is a member of
557          * a tag set, or is equal to another BalanceElement.
558          *
559          * @param BalanceElement|array|string $set The target BalanceElement,
560          *   set (from the BalanceSets class), or string (HTML tag name).
561          * @return bool
562          */
563         public function isA( $set ) {
564                 if ( $set instanceof BalanceElement ) {
565                         return $this === $set;
566                 } elseif ( is_array( $set ) ) {
567                         return isset( $set[$this->namespaceURI] ) &&
568                                 isset( $set[$this->namespaceURI][$this->localName] );
569                 } else {
570                         // assume this is an HTML element name.
571                         return $this->isHtml() && $this->localName === $set;
572                 }
573         }
574
575         /**
576          * Determine if this element is an HTML element with the specified name
577          * @param string $tagName
578          * @return bool
579          */
580         public function isHtmlNamed( $tagName ) {
581                 return $this->namespaceURI === BalanceSets::HTML_NAMESPACE
582                         && $this->localName === $tagName;
583         }
584
585         /**
586          * Determine if $this represents an element in the HTML namespace.
587          *
588          * @return bool
589          */
590         public function isHtml() {
591                 return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
592         }
593
594         /**
595          * Determine if $this represents a MathML text integration point,
596          * as defined in the HTML5 specification.
597          *
598          * @return bool
599          * @see https://html.spec.whatwg.org/multipage/syntax.html#mathml-text-integration-point
600          */
601         public function isMathmlTextIntegrationPoint() {
602                 return $this->isA( BalanceSets::$mathmlTextIntegrationPointSet );
603         }
604
605         /**
606          * Determine if $this represents an HTML integration point,
607          * as defined in the HTML5 specification.
608          *
609          * @return bool
610          * @see https://html.spec.whatwg.org/multipage/syntax.html#html-integration-point
611          */
612         public function isHtmlIntegrationPoint() {
613                 if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
614                         return true;
615                 }
616                 if (
617                         $this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
618                         $this->localName === 'annotation-xml' &&
619                         isset( $this->attribs['encoding'] ) &&
620                         ( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
621                         strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
622                 ) {
623                         return true;
624                 }
625                 return false;
626         }
627
628         /**
629          * Get a string key for the Noah's Ark algorithm
630          * @return string
631          */
632         public function getNoahKey() {
633                 if ( $this->noahKey === null ) {
634                         $attribs = $this->attribs;
635                         ksort( $attribs );
636                         $this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
637                 }
638                 return $this->noahKey;
639         }
640 }
641
642 /**
643  * The "stack of open elements" as defined in the HTML5 tree builder
644  * spec.  This contains methods to ensure that content (start tags, text)
645  * are inserted at the correct place in the output string, and to
646  * flatten BalanceElements are they are closed to avoid holding onto
647  * a complete DOM tree for the document in memory.
648  *
649  * The stack defines a PHP iterator to traverse it in "reverse order",
650  * that is, the most-recently-added element is visited first in a
651  * foreach loop.
652  *
653  * @ingroup Parser
654  * @since 1.27
655  * @see https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
656  */
657 class BalanceStack implements IteratorAggregate {
658         /**
659          * Backing storage for the stack.
660          * @var BalanceElement[] $elements
661          */
662         private $elements = [];
663         /**
664          * Foster parent mode determines how nodes are inserted into the
665          * stack.
666          * @var bool $fosterParentMode
667          * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
668          */
669         public $fosterParentMode = false;
670         /**
671          * Configuration options governing flattening.
672          * @var array $config
673          * @see Balancer::__construct()
674          */
675         private $config;
676         /**
677          * Reference to the current element
678          */
679         public $currentNode;
680
681         /**
682          * Create a new BalanceStack with a single BalanceElement on it,
683          * representing the root &lt;html&gt; node.
684          * @param array $config Balancer configuration; see Balancer::_construct().
685          */
686         public function __construct( array $config ) {
687                 // always a root <html> element on the stack
688                 array_push(
689                         $this->elements,
690                         new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', [] )
691                 );
692                 $this->currentNode = $this->elements[0];
693                 $this->config = $config;
694         }
695
696         /**
697          * Return a string representing the output of the tree builder:
698          * all the children of the root &lt;html&gt; node.
699          * @return string
700          */
701         public function getOutput() {
702                 // Don't include the outer '<html>....</html>'
703                 $out = '';
704                 foreach ( $this->elements[0]->children as $elt ) {
705                         $out .= is_string( $elt ) ? $elt :
706                                 $elt->flatten( $this->config );
707                 }
708                 return $out;
709         }
710
711         /**
712          * Insert a comment at the appropriate place for inserting a node.
713          * @param string $value Content of the comment.
714          * @return string
715          * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-comment
716          */
717         public function insertComment( $value ) {
718                 // Just another type of text node, except for tidy p-wrapping.
719                 return $this->insertText( '<!--' . $value . '-->', true );
720         }
721
722         /**
723          * Insert text at the appropriate place for inserting a node.
724          * @param string $value
725          * @param bool $isComment
726          * @return string
727          * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
728          */
729         public function insertText( $value, $isComment = false ) {
730                 if (
731                         $this->fosterParentMode &&
732                         $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
733                 ) {
734                         $this->fosterParent( $value );
735                 } elseif (
736                         $this->config['tidyCompat'] && !$isComment &&
737                         $this->currentNode->isA( BalanceSets::$tidyPWrapSet )
738                 ) {
739                         $this->insertHTMLElement( 'mw:p-wrap', [] );
740                         return $this->insertText( $value );
741                 } else {
742                         $this->currentNode->appendChild( $value );
743                 }
744         }
745
746         /**
747          * Insert a BalanceElement at the appropriate place, pushing it
748          * on to the open elements stack.
749          * @param string $namespaceURI The element namespace
750          * @param string $tag The tag name
751          * @param string $attribs Normalized attributes, as a string.
752          * @return BalanceElement
753          * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-foreign-element
754          */
755         public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
756                 return $this->insertElement(
757                         new BalanceElement( $namespaceURI, $tag, $attribs )
758                 );
759         }
760
761         /**
762          * Insert an HTML element at the appropriate place, pushing it on to
763          * the open elements stack.
764          * @param string $tag The tag name
765          * @param string $attribs Normalized attributes, as a string.
766          * @return BalanceElement
767          * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-an-html-element
768          */
769         public function insertHTMLElement( $tag, $attribs ) {
770                 return $this->insertForeignElement(
771                         BalanceSets::HTML_NAMESPACE, $tag, $attribs
772                 );
773         }
774
775         /**
776          * Insert an element at the appropriate place and push it on to the
777          * open elements stack.
778          * @param BalanceElement $elt
779          * @return BalanceElement
780          * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
781          */
782         public function insertElement( BalanceElement $elt ) {
783                 if (
784                         $this->currentNode->isHtmlNamed( 'mw:p-wrap' ) &&
785                         !$elt->isA( BalanceSets::$tidyInlineSet )
786                 ) {
787                         // Tidy compatibility.
788                         $this->pop();
789                 }
790                 if (
791                         $this->fosterParentMode &&
792                         $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
793                 ) {
794                         $elt = $this->fosterParent( $elt );
795                 } else {
796                         $this->currentNode->appendChild( $elt );
797                 }
798                 Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
799                 Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
800                 array_push( $this->elements, $elt );
801                 $this->currentNode = $elt;
802                 return $elt;
803         }
804
805         /**
806          * Determine if the stack has $tag in scope.
807          * @param BalanceElement|array|string $tag
808          * @return bool
809          * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-scope
810          */
811         public function inScope( $tag ) {
812                 return $this->inSpecificScope( $tag, BalanceSets::$inScopeSet );
813         }
814
815         /**
816          * Determine if the stack has $tag in button scope.
817          * @param BalanceElement|array|string $tag
818          * @return bool
819          * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-button-scope
820          */
821         public function inButtonScope( $tag ) {
822                 return $this->inSpecificScope( $tag, BalanceSets::inButtonScopeSet() );
823         }
824
825         /**
826          * Determine if the stack has $tag in list item scope.
827          * @param BalanceElement|array|string $tag
828          * @return bool
829          * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-list-item-scope
830          */
831         public function inListItemScope( $tag ) {
832                 return $this->inSpecificScope( $tag, BalanceSets::inListItemScopeSet() );
833         }
834
835         /**
836          * Determine if the stack has $tag in table scope.
837          * @param BalanceElement|array|string $tag
838          * @return bool
839          * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-table-scope
840          */
841         public function inTableScope( $tag ) {
842                 return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
843         }
844
845         /**
846          * Determine if the stack has $tag in select scope.
847          * @param BalanceElement|array|string $tag
848          * @return bool
849          * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-select-scope
850          */
851         public function inSelectScope( $tag ) {
852                 // Can't use inSpecificScope to implement this, since it involves
853                 // *inverting* a set of tags.  Implement manually.
854                 foreach ( $this as $elt ) {
855                         if ( $elt->isA( $tag ) ) {
856                                 return true;
857                         }
858                         if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
859                                 return false;
860                         }
861                 }
862                 return false;
863         }
864
865         /**
866          * Determine if the stack has $tag in a specific scope, $set.
867          * @param BalanceElement|array|string $tag
868          * @param BalanceElement|array|string $set
869          * @return bool
870          * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-the-specific-scope
871          */
872         public function inSpecificScope( $tag, $set ) {
873                 foreach ( $this as $elt ) {
874                         if ( $elt->isA( $tag ) ) {
875                                 return true;
876                         }
877                         if ( $elt->isA( $set ) ) {
878                                 return false;
879                         }
880                 }
881                 return false;
882         }
883
884         /**
885          * Generate implied end tags.
886          * @param string $butnot
887          * @param bool $thorough True if we should generate end tags thoroughly.
888          * @see https://html.spec.whatwg.org/multipage/syntax.html#generate-implied-end-tags
889          */
890         public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
891                 $endTagSet = $thorough ?
892                         BalanceSets::$thoroughImpliedEndTagsSet :
893                         BalanceSets::$impliedEndTagsSet;
894                 while ( $this->currentNode ) {
895                         if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
896                                 break;
897                         }
898                         if ( !$this->currentNode->isA( $endTagSet ) ) {
899                                 break;
900                         }
901                         $this->pop();
902                 }
903         }
904
905         /**
906          * Return the adjusted current node.
907          * @param string $fragmentContext
908          * @return string
909          */
910         public function adjustedCurrentNode( $fragmentContext ) {
911                 return ( $fragmentContext && count( $this->elements ) === 1 ) ?
912                         $fragmentContext : $this->currentNode;
913         }
914
915         /**
916          * Return an iterator over this stack which visits the current node
917          * first, and the root node last.
918          * @return \Iterator
919          */
920         public function getIterator() {
921                 return new ReverseArrayIterator( $this->elements );
922         }
923
924         /**
925          * Return the BalanceElement at the given position $idx, where
926          * position 0 represents the root element.
927          * @param int $idx
928          * @return BalanceElement
929          */
930         public function node( $idx ) {
931                 return $this->elements[ $idx ];
932         }
933
934         /**
935          * Replace the element at position $idx in the BalanceStack with $elt.
936          * @param int $idx
937          * @param BalanceElement $elt
938          */
939         public function replaceAt( $idx, BalanceElement $elt ) {
940                 Assert::precondition(
941                         $this->elements[$idx]->parent !== 'flat',
942                         'Replaced element should not have already been flattened.'
943                 );
944                 Assert::precondition(
945                         $elt->parent !== 'flat',
946                         'New element should not have already been flattened.'
947                 );
948                 $this->elements[$idx] = $elt;
949                 if ( $idx === count( $this->elements ) - 1 ) {
950                         $this->currentNode = $elt;
951                 }
952         }
953
954         /**
955          * Return the position of the given BalanceElement, set, or
956          * HTML tag name string in the BalanceStack.
957          * @param BalanceElement|array|string $tag
958          * @return int
959          */
960         public function indexOf( $tag ) {
961                 for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
962                         if ( $this->elements[$i]->isA( $tag ) ) {
963                                 return $i;
964                         }
965                 }
966                 return -1;
967         }
968
969         /**
970          * Return the number of elements currently in the BalanceStack.
971          * @return int
972          */
973         public function length() {
974                 return count( $this->elements );
975         }
976
977         /**
978          * Remove the current node from the BalanceStack, flattening it
979          * in the process.
980          */
981         public function pop() {
982                 $elt = array_pop( $this->elements );
983                 if ( count( $this->elements ) ) {
984                         $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
985                 } else {
986                         $this->currentNode = null;
987                 }
988                 if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
989                         $elt->flatten( $this->config );
990                 }
991         }
992
993         /**
994          * Remove all nodes up to and including position $idx from the
995          * BalanceStack, flattening them in the process.
996          * @param int $idx
997          */
998         public function popTo( $idx ) {
999                 for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
1000                         $this->pop();
1001                 }
1002         }
1003
1004         /**
1005          * Pop elements off the stack up to and including the first
1006          * element with the specified HTML tagname (or matching the given
1007          * set).
1008          * @param BalanceElement|array|string $tag
1009          */
1010         public function popTag( $tag ) {
1011                 while ( $this->currentNode ) {
1012                         if ( $this->currentNode->isA( $tag ) ) {
1013                                 $this->pop();
1014                                 break;
1015                         }
1016                         $this->pop();
1017                 }
1018         }
1019
1020         /**
1021          * Pop elements off the stack *not including* the first element
1022          * in the specified set.
1023          * @param BalanceElement|array|string $set
1024          */
1025         public function clearToContext( $set ) {
1026                 // Note that we don't loop to 0. Never pop the <html> elt off.
1027                 for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1028                         if ( $this->currentNode->isA( $set ) ) {
1029                                 break;
1030                         }
1031                         $this->pop();
1032                 }
1033         }
1034
1035         /**
1036          * Remove the given $elt from the BalanceStack, optionally
1037          * flattening it in the process.
1038          * @param BalanceElement $elt The element to remove.
1039          * @param bool $flatten Whether to flatten the removed element.
1040          */
1041         public function removeElement( BalanceElement $elt, $flatten = true ) {
1042                 Assert::parameter(
1043                         $elt->parent !== 'flat',
1044                         '$elt',
1045                         '$elt should not already have been flattened.'
1046                 );
1047                 Assert::parameter(
1048                         $elt->parent->parent !== 'flat',
1049                         '$elt',
1050                         'The parent of $elt should not already have been flattened.'
1051                 );
1052                 $idx = array_search( $elt, $this->elements, true );
1053                 Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
1054                 array_splice( $this->elements, $idx, 1 );
1055                 if ( $idx === count( $this->elements ) ) {
1056                         $this->currentNode = $this->elements[$idx - 1];
1057                 }
1058                 if ( $flatten ) {
1059                         // serialize $elt into its parent
1060                         // otherwise, it will eventually serialize when the parent
1061                         // is serialized, we just hold onto the memory for its
1062                         // tree of objects a little longer.
1063                         $elt->flatten( $this->config );
1064                 }
1065                 Assert::postcondition(
1066                         array_search( $elt, $this->elements, true ) === false,
1067                         '$elt should no longer be in open elements stack'
1068                 );
1069         }
1070
1071         /**
1072          * Find $a in the BalanceStack and insert $b after it.
1073          * @param BalanceElement $a
1074          * @param BalanceElement $b
1075          */
1076         public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1077                 $idx = $this->indexOf( $a );
1078                 Assert::parameter( $idx !== false, '$a', 'must be in stack' );
1079                 if ( $idx === count( $this->elements ) - 1 ) {
1080                         array_push( $this->elements, $b );
1081                         $this->currentNode = $b;
1082                 } else {
1083                         array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1084                 }
1085         }
1086
1087         // Fostering and adoption.
1088
1089         /**
1090          * Foster parent the given $elt in the stack of open elements.
1091          * @param BalanceElement|string $elt
1092          * @return BalanceElement|string
1093          *
1094          * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
1095          */
1096         private function fosterParent( $elt ) {
1097                 $lastTable = $this->indexOf( 'table' );
1098                 $lastTemplate = $this->indexOf( 'template' );
1099                 $parent = null;
1100                 $before = null;
1101
1102                 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1103                         $parent = $this->elements[$lastTemplate];
1104                 } elseif ( $lastTable >= 0 ) {
1105                         $parent = $this->elements[$lastTable]->parent;
1106                         // Assume all tables have parents, since we're not running scripts!
1107                         Assert::invariant(
1108                                 $parent !== null, "All tables should have parents"
1109                         );
1110                         $before = $this->elements[$lastTable];
1111                 } else {
1112                         $parent = $this->elements[0]; // the `html` element.
1113                 }
1114
1115                 if ( $this->config['tidyCompat'] ) {
1116                         if ( is_string( $elt ) ) {
1117                                 // We're fostering text: do we need a p-wrapper?
1118                                 if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1119                                         $this->insertHTMLElement( 'mw:p-wrap', [] );
1120                                         $this->insertText( $elt );
1121                                         return $elt;
1122                                 }
1123                         } else {
1124                                 // We're fostering an element; do we need to merge p-wrappers?
1125                                 if ( $elt->isHtmlNamed( 'mw:p-wrap' ) ) {
1126                                         $idx = $before ?
1127                                                 array_search( $before, $parent->children, true ) :
1128                                                 count( $parent->children );
1129                                         $after = $idx > 0 ? $parent->children[$idx - 1] : '';
1130                                         if (
1131                                                 $after instanceof BalanceElement &&
1132                                                 $after->isHtmlNamed( 'mw:p-wrap' )
1133                                         ) {
1134                                                 return $after; // Re-use existing p-wrapper.
1135                                         }
1136                                 }
1137                         }
1138                 }
1139
1140                 if ( $before ) {
1141                         $parent->insertBefore( $before, $elt );
1142                 } else {
1143                         $parent->appendChild( $elt );
1144                 }
1145                 return $elt;
1146         }
1147
1148         /**
1149          * Run the "adoption agency algoritm" (AAA) for the given subject
1150          * tag name.
1151          * @param string $tag The subject tag name.
1152          * @param BalanceActiveFormattingElements $afe The current
1153          *   active formatting elements list.
1154          * @return true if the adoption agency algorithm "did something", false
1155          *   if more processing is required by the caller.
1156          * @see https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1157          */
1158         public function adoptionAgency( $tag, $afe ) {
1159                 // If the current node is an HTML element whose tag name is subject,
1160                 // and the current node is not in the list of active formatting
1161                 // elements, then pop the current node off the stack of open
1162                 // elements and abort these steps.
1163                 if (
1164                         $this->currentNode->isHtmlNamed( $tag ) &&
1165                         !$afe->isInList( $this->currentNode )
1166                 ) {
1167                         $this->pop();
1168                         return true; // no more handling required
1169                 }
1170
1171                 // Outer loop: If outer loop counter is greater than or
1172                 // equal to eight, then abort these steps.
1173                 for ( $outer = 0; $outer < 8; $outer++ ) {
1174                         // Let the formatting element be the last element in the list
1175                         // of active formatting elements that: is between the end of
1176                         // the list and the last scope marker in the list, if any, or
1177                         // the start of the list otherwise, and has the same tag name
1178                         // as the token.
1179                         $fmtElt = $afe->findElementByTag( $tag );
1180
1181                         // If there is no such node, then abort these steps and instead
1182                         // act as described in the "any other end tag" entry below.
1183                         if ( !$fmtElt ) {
1184                                 return false; // false means handle by the default case
1185                         }
1186
1187                         // Otherwise, if there is such a node, but that node is not in
1188                         // the stack of open elements, then this is a parse error;
1189                         // remove the element from the list, and abort these steps.
1190                         $index = $this->indexOf( $fmtElt );
1191                         if ( $index < 0 ) {
1192                                 $afe->remove( $fmtElt );
1193                                 return true;   // true means no more handling required
1194                         }
1195
1196                         // Otherwise, if there is such a node, and that node is also in
1197                         // the stack of open elements, but the element is not in scope,
1198                         // then this is a parse error; ignore the token, and abort
1199                         // these steps.
1200                         if ( !$this->inScope( $fmtElt ) ) {
1201                                 return true;
1202                         }
1203
1204                         // Let the furthest block be the topmost node in the stack of
1205                         // open elements that is lower in the stack than the formatting
1206                         // element, and is an element in the special category. There
1207                         // might not be one.
1208                         $furthestBlock = null;
1209                         $furthestBlockIndex = -1;
1210                         $stackLength = $this->length();
1211                         for ( $i = $index + 1; $i < $stackLength; $i++ ) {
1212                                 if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1213                                         $furthestBlock = $this->node( $i );
1214                                         $furthestBlockIndex = $i;
1215                                         break;
1216                                 }
1217                         }
1218
1219                         // If there is no furthest block, then the UA must skip the
1220                         // subsequent steps and instead just pop all the nodes from the
1221                         // bottom of the stack of open elements, from the current node
1222                         // up to and including the formatting element, and remove the
1223                         // formatting element from the list of active formatting
1224                         // elements.
1225                         if ( !$furthestBlock ) {
1226                                 $this->popTag( $fmtElt );
1227                                 $afe->remove( $fmtElt );
1228                                 return true;
1229                         }
1230
1231                         // Let the common ancestor be the element immediately above
1232                         // the formatting element in the stack of open elements.
1233                         $ancestor = $this->node( $index - 1 );
1234
1235                         // Let a bookmark note the position of the formatting
1236                         // element in the list of active formatting elements
1237                         // relative to the elements on either side of it in the
1238                         // list.
1239                         $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1240                         $afe->insertAfter( $fmtElt, $BOOKMARK );
1241
1242                         // Let node and last node be the furthest block.
1243                         $node = $furthestBlock;
1244                         $lastNode = $furthestBlock;
1245                         $nodeIndex = $furthestBlockIndex;
1246                         $isAFE = false;
1247
1248                         // Inner loop
1249                         for ( $inner = 1; true; $inner++ ) {
1250                                 // Let node be the element immediately above node in
1251                                 // the stack of open elements, or if node is no longer
1252                                 // in the stack of open elements (e.g. because it got
1253                                 // removed by this algorithm), the element that was
1254                                 // immediately above node in the stack of open elements
1255                                 // before node was removed.
1256                                 $node = $this->node( --$nodeIndex );
1257
1258                                 // If node is the formatting element, then go
1259                                 // to the next step in the overall algorithm.
1260                                 if ( $node === $fmtElt ) break;
1261
1262                                 // If the inner loop counter is greater than three and node
1263                                 // is in the list of active formatting elements, then remove
1264                                 // node from the list of active formatting elements.
1265                                 $isAFE = $afe->isInList( $node );
1266                                 if ( $inner > 3 && $isAFE ) {
1267                                         $afe->remove( $node );
1268                                         $isAFE = false;
1269                                 }
1270
1271                                 // If node is not in the list of active formatting
1272                                 // elements, then remove node from the stack of open
1273                                 // elements and then go back to the step labeled inner
1274                                 // loop.
1275                                 if ( !$isAFE ) {
1276                                         // Don't flatten here, since we're about to relocate
1277                                         // parts of this $node.
1278                                         $this->removeElement( $node, false );
1279                                         continue;
1280                                 }
1281
1282                                 // Create an element for the token for which the
1283                                 // element node was created with common ancestor as
1284                                 // the intended parent, replace the entry for node
1285                                 // in the list of active formatting elements with an
1286                                 // entry for the new element, replace the entry for
1287                                 // node in the stack of open elements with an entry for
1288                                 // the new element, and let node be the new element.
1289                                 $newElt = new BalanceElement(
1290                                         $node->namespaceURI, $node->localName, $node->attribs );
1291                                 $afe->replace( $node, $newElt );
1292                                 $this->replaceAt( $nodeIndex, $newElt );
1293                                 $node = $newElt;
1294
1295                                 // If last node is the furthest block, then move the
1296                                 // aforementioned bookmark to be immediately after the
1297                                 // new node in the list of active formatting elements.
1298                                 if ( $lastNode === $furthestBlock ) {
1299                                         $afe->remove( $BOOKMARK );
1300                                         $afe->insertAfter( $newElt, $BOOKMARK );
1301                                 }
1302
1303                                 // Insert last node into node, first removing it from
1304                                 // its previous parent node if any.
1305                                 $node->appendChild( $lastNode );
1306
1307                                 // Let last node be node.
1308                                 $lastNode = $node;
1309                         }
1310
1311                         // If the common ancestor node is a table, tbody, tfoot,
1312                         // thead, or tr element, then, foster parent whatever last
1313                         // node ended up being in the previous step, first removing
1314                         // it from its previous parent node if any.
1315                         if (
1316                                 $this->fosterParentMode &&
1317                                 $ancestor->isA( BalanceSets::$tableSectionRowSet )
1318                         ) {
1319                                 $this->fosterParent( $lastNode );
1320                         } else {
1321                                 // Otherwise, append whatever last node ended up being in
1322                                 // the previous step to the common ancestor node, first
1323                                 // removing it from its previous parent node if any.
1324                                 $ancestor->appendChild( $lastNode );
1325                         }
1326
1327                         // Create an element for the token for which the
1328                         // formatting element was created, with furthest block
1329                         // as the intended parent.
1330                         $newElt2 = new BalanceElement(
1331                                 $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1332
1333                         // Take all of the child nodes of the furthest block and
1334                         // append them to the element created in the last step.
1335                         $newElt2->adoptChildren( $furthestBlock );
1336
1337                         // Append that new element to the furthest block.
1338                         $furthestBlock->appendChild( $newElt2 );
1339
1340                         // Remove the formatting element from the list of active
1341                         // formatting elements, and insert the new element into the
1342                         // list of active formatting elements at the position of
1343                         // the aforementioned bookmark.
1344                         $afe->remove( $fmtElt );
1345                         $afe->replace( $BOOKMARK, $newElt2 );
1346
1347                         // Remove the formatting element from the stack of open
1348                         // elements, and insert the new element into the stack of
1349                         // open elements immediately below the position of the
1350                         // furthest block in that stack.
1351                         $this->removeElement( $fmtElt );
1352                         $this->insertAfter( $furthestBlock, $newElt2 );
1353                 }
1354
1355                 return true;
1356         }
1357
1358         /**
1359          * Return the contents of the open elements stack as a string for
1360          * debugging.
1361          * @return string
1362          */
1363         public function __toString() {
1364                 $r = [];
1365                 foreach ( $this->elements as $elt ) {
1366                         array_push( $r, $elt->localName );
1367                 }
1368                 return implode( $r, ' ' );
1369         }
1370 }
1371
1372 /**
1373  * A pseudo-element used as a marker in the list of active formatting elements
1374  *
1375  * @ingroup Parser
1376  * @since 1.27
1377  */
1378 class BalanceMarker {
1379         public $nextAFE;
1380         public $prevAFE;
1381 }
1382
1383 /**
1384  * The list of active formatting elements, which is used to handle
1385  * mis-nested formatting element tags in the HTML5 tree builder
1386  * specification.
1387  *
1388  * @ingroup Parser
1389  * @since 1.27
1390  * @see https://html.spec.whatwg.org/multipage/syntax.html#list-of-active-formatting-elements
1391  */
1392 class BalanceActiveFormattingElements {
1393         /** The last (most recent) element in the list */
1394         private $tail;
1395
1396         /** The first (least recent) element in the list */
1397         private $head;
1398
1399         /**
1400          * An array of arrays representing the population of elements in each bucket
1401          * according to the Noah's Ark clause. The outer array is stack-like, with each
1402          * integer-indexed element representing a segment of the list, bounded by
1403          * markers. The first element represents the segment of the list before the
1404          * first marker.
1405          *
1406          * The inner arrays are indexed by "Noah key", which is a string which uniquely
1407          * identifies each bucket according to the rules in the spec. The value in
1408          * the inner array is the first (least recently inserted) element in the bucket,
1409          * and subsequent members of the bucket can be found by iterating through the
1410          * singly-linked list via $node->nextNoah.
1411          *
1412          * This is optimised for the most common case of inserting into a bucket
1413          * with zero members, and deleting a bucket containing one member. In the
1414          * worst case, iteration through the list is still O(1) in the document
1415          * size, since each bucket can have at most 3 members.
1416          */
1417         private $noahTableStack = [ [] ];
1418
1419         public function __destruct() {
1420                 $next = null;
1421                 for ( $node = $this->head; $node; $node = $next ) {
1422                         $next = $node->nextAFE;
1423                         $node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1424                 }
1425                 $this->head = $this->tail = $this->noahTableStack = null;
1426         }
1427
1428         public function insertMarker() {
1429                 $elt = new BalanceMarker;
1430                 if ( $this->tail ) {
1431                         $this->tail->nextAFE = $elt;
1432                         $elt->prevAFE = $this->tail;
1433                 } else {
1434                         $this->head = $elt;
1435                 }
1436                 $this->tail = $elt;
1437                 $this->noahTableStack[] = [];
1438         }
1439
1440         /**
1441          * Follow the steps required when the spec requires us to "push onto the
1442          * list of active formatting elements".
1443          * @param BalanceElement $elt
1444          */
1445         public function push( BalanceElement $elt ) {
1446                 // Must not be in the list already
1447                 if ( $elt->prevAFE !== null || $this->head === $elt ) {
1448                         throw new ParameterAssertionException( '$elt',
1449                                 'Cannot insert a node into the AFE list twice' );
1450                 }
1451
1452                 // "Noah's Ark clause" -- if there are already three copies of
1453                 // this element before we encounter a marker, then drop the last
1454                 // one.
1455                 $noahKey = $elt->getNoahKey();
1456                 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1457                 if ( !isset( $table[$noahKey] ) ) {
1458                         $table[$noahKey] = $elt;
1459                 } else {
1460                         $count = 1;
1461                         $head = $tail = $table[$noahKey];
1462                         while ( $tail->nextNoah ) {
1463                                 $tail = $tail->nextNoah;
1464                                 $count++;
1465                         }
1466                         if ( $count >= 3 ) {
1467                                 $this->remove( $head );
1468                         }
1469                         $tail->nextNoah = $elt;
1470                 }
1471                 // Add to the main AFE list
1472                 if ( $this->tail ) {
1473                         $this->tail->nextAFE = $elt;
1474                         $elt->prevAFE = $this->tail;
1475                 } else {
1476                         $this->head = $elt;
1477                 }
1478                 $this->tail = $elt;
1479         }
1480
1481         /**
1482          * Follow the steps required when the spec asks us to "clear the list of
1483          * active formatting elements up to the last marker".
1484          */
1485         public function clearToMarker() {
1486                 // Iterate back through the list starting from the tail
1487                 $tail = $this->tail;
1488                 while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1489                         // Unlink the element
1490                         $prev = $tail->prevAFE;
1491                         $tail->prevAFE = null;
1492                         if ( $prev ) {
1493                                 $prev->nextAFE = null;
1494                         }
1495                         $tail->nextNoah = null;
1496                         $tail = $prev;
1497                 }
1498                 // If we finished on a marker, unlink it and pop it off the Noah table stack
1499                 if ( $tail ) {
1500                         $prev = $tail->prevAFE;
1501                         if ( $prev ) {
1502                                 $prev->nextAFE = null;
1503                         }
1504                         $tail = $prev;
1505                         array_pop( $this->noahTableStack );
1506                 } else {
1507                         // No marker: wipe the top-level Noah table (which is the only one)
1508                         $this->noahTableStack[0] = [];
1509                 }
1510                 // If we removed all the elements, clear the head pointer
1511                 if ( !$tail ) {
1512                         $this->head = null;
1513                 }
1514                 $this->tail = $tail;
1515         }
1516
1517         /**
1518          * Find and return the last element with the specified tag between the
1519          * end of the list and the last marker on the list.
1520          * Used when parsing &lt;a&gt; "in body mode".
1521          * @param string $tag
1522          * @return null|Node
1523          */
1524         public function findElementByTag( $tag ) {
1525                 $elt = $this->tail;
1526                 while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1527                         if ( $elt->localName === $tag ) {
1528                                 return $elt;
1529                         }
1530                         $elt = $elt->prevAFE;
1531                 }
1532                 return null;
1533         }
1534
1535         /**
1536          * Determine whether an element is in the list of formatting elements.
1537          * @param BalanceElement $elt
1538          * @return bool
1539          */
1540         public function isInList( BalanceElement $elt ) {
1541                 return $this->head === $elt || $elt->prevAFE;
1542         }
1543
1544         /**
1545          * Find the element $elt in the list and remove it.
1546          * Used when parsing &lt;a&gt; in body mode.
1547          *
1548          * @param BalanceElement $elt
1549          */
1550         public function remove( BalanceElement $elt ) {
1551                 if ( $this->head !== $elt && !$elt->prevAFE ) {
1552                         throw new ParameterAssertionException( '$elt',
1553                                 "Attempted to remove an element which is not in the AFE list" );
1554                 }
1555                 // Update head and tail pointers
1556                 if ( $this->head === $elt ) {
1557                         $this->head = $elt->nextAFE;
1558                 }
1559                 if ( $this->tail === $elt ) {
1560                         $this->tail = $elt->prevAFE;
1561                 }
1562                 // Update previous element
1563                 if ( $elt->prevAFE ) {
1564                         $elt->prevAFE->nextAFE = $elt->nextAFE;
1565                 }
1566                 // Update next element
1567                 if ( $elt->nextAFE ) {
1568                         $elt->nextAFE->prevAFE = $elt->prevAFE;
1569                 }
1570                 // Clear pointers so that isInList() etc. will work
1571                 $elt->prevAFE = $elt->nextAFE = null;
1572                 // Update Noah list
1573                 $this->removeFromNoahList( $elt );
1574         }
1575
1576         private function addToNoahList( BalanceElement $elt ) {
1577                 $noahKey = $elt->getNoahKey();
1578                 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1579                 if ( !isset( $table[$noahKey] ) ) {
1580                         $table[$noahKey] = $elt;
1581                 } else {
1582                         $tail = $table[$noahKey];
1583                         while ( $tail->nextNoah ) {
1584                                 $tail = $tail->nextNoah;
1585                         }
1586                         $tail->nextNoah = $elt;
1587                 }
1588         }
1589
1590         private function removeFromNoahList( BalanceElement $elt ) {
1591                 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1592                 $key = $elt->getNoahKey();
1593                 $noahElt = $table[$key];
1594                 if ( $noahElt === $elt ) {
1595                         if ( $noahElt->nextNoah ) {
1596                                 $table[$key] = $noahElt->nextNoah;
1597                                 $noahElt->nextNoah = null;
1598                         } else {
1599                                 unset( $table[$key] );
1600                         }
1601                 } else {
1602                         do {
1603                                 $prevNoahElt = $noahElt;
1604                                 $noahElt = $prevNoahElt->nextNoah;
1605                                 if ( $noahElt === $elt ) {
1606                                         // Found it, unlink
1607                                         $prevNoahElt->nextNoah = $elt->nextNoah;
1608                                         $elt->nextNoah = null;
1609                                         break;
1610                                 }
1611                         } while ( $noahElt );
1612                 }
1613         }
1614
1615         /**
1616          * Find element $a in the list and replace it with element $b
1617          *
1618          * @param BalanceElement $a
1619          * @param BalanceElement $b
1620          */
1621         public function replace( BalanceElement $a, BalanceElement $b ) {
1622                 if ( $this->head !== $a && !$a->prevAFE ) {
1623                         throw new ParameterAssertionException( '$a',
1624                                 "Attempted to replace an element which is not in the AFE list" );
1625                 }
1626                 // Update head and tail pointers
1627                 if ( $this->head === $a ) {
1628                         $this->head = $b;
1629                 }
1630                 if ( $this->tail === $a ) {
1631                         $this->tail = $b;
1632                 }
1633                 // Update previous element
1634                 if ( $a->prevAFE ) {
1635                         $a->prevAFE->nextAFE = $b;
1636                 }
1637                 // Update next element
1638                 if ( $a->nextAFE ) {
1639                         $a->nextAFE->prevAFE = $b;
1640                 }
1641                 $b->prevAFE = $a->prevAFE;
1642                 $b->nextAFE = $a->nextAFE;
1643                 $a->nextAFE = $a->prevAFE = null;
1644                 // Update Noah list
1645                 $this->removeFromNoahList( $a );
1646                 $this->addToNoahList( $b );
1647         }
1648
1649         /**
1650          * Find $a in the list and insert $b after it.
1651
1652          * @param BalanceElement $a
1653          * @param BalanceElement $b
1654          */
1655         public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1656                 if ( $this->head !== $a && !$a->prevAFE ) {
1657                         throw new ParameterAssertionException( '$a',
1658                                 "Attempted to insert after an element which is not in the AFE list" );
1659                 }
1660                 if ( $this->tail === $a ) {
1661                         $this->tail = $b;
1662                 }
1663                 if ( $a->nextAFE ) {
1664                         $a->nextAFE->prevAFE = $b;
1665                 }
1666                 $b->nextAFE = $a->nextAFE;
1667                 $b->prevAFE = $a;
1668                 $a->nextAFE = $b;
1669                 $this->addToNoahList( $b );
1670         }
1671
1672         // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1673         /**
1674          * Reconstruct the active formatting elements.
1675          * @param BalanceStack $stack The open elements stack
1676          * @see https://html.spec.whatwg.org/multipage/syntax.html#reconstruct-the-active-formatting-elements
1677          */
1678         // @codingStandardsIgnoreEnd
1679         public function reconstruct( $stack ) {
1680                 $entry = $this->tail;
1681                 // If there are no entries in the list of active formatting elements,
1682                 // then there is nothing to reconstruct
1683                 if ( !$entry ) {
1684                         return;
1685                 }
1686                 // If the last is a marker, do nothing.
1687                 if ( $entry instanceof BalanceMarker ) {
1688                         return;
1689                 }
1690                 // Or if it is an open element, do nothing.
1691                 if ( $stack->indexOf( $entry ) >= 0 ) {
1692                         return;
1693                 }
1694
1695                 // Loop backward through the list until we find a marker or an
1696                 // open element
1697                 $foundIt = false;
1698                 while ( $entry->prevAFE ) {
1699                         $entry = $entry->prevAFE;
1700                         if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1701                                 $foundIt = true;
1702                                 break;
1703                         }
1704                 }
1705
1706                 // Now loop forward, starting from the element after the current one (or
1707                 // the first element if we didn't find a marker or open element),
1708                 // recreating formatting elements and pushing them back onto the list
1709                 // of open elements.
1710                 if ( $foundIt ) {
1711                         $entry = $entry->nextAFE;
1712                 }
1713                 do {
1714                         $newElement = $stack->insertHTMLElement(
1715                                 $entry->localName,
1716                                 $entry->attribs );
1717                         $this->replace( $entry, $newElement );
1718                         $entry = $newElement->nextAFE;
1719                 } while ( $entry );
1720         }
1721
1722         /**
1723          * Get a string representation of the AFE list, for debugging
1724          */
1725         public function __toString() {
1726                 $prev = null;
1727                 $s = '';
1728                 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1729                         if ( $node instanceof BalanceMarker ) {
1730                                 $s .= "MARKER\n";
1731                                 continue;
1732                         }
1733                         $s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1734                         if ( $node->nextNoah ) {
1735                                 $s .= " (noah sibling: {$node->nextNoah->localName}#" .
1736                                         substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1737                                         ')';
1738                         }
1739                         if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1740                                 $s .= " (reverse link is wrong!)";
1741                         }
1742                         $s .= "\n";
1743                 }
1744                 if ( $prev !== $this->tail ) {
1745                         $s .= "(tail pointer is wrong!)\n";
1746                 }
1747                 return $s;
1748         }
1749 }
1750
1751 /**
1752  * An implementation of the tree building portion of the HTML5 parsing
1753  * spec.
1754  *
1755  * This is used to balance and tidy output so that the result can
1756  * always be cleanly serialized/deserialized by an HTML5 parser.  It
1757  * does *not* guarantee "conforming" output -- the HTML5 spec contains
1758  * a number of constraints which are not enforced by the HTML5 parsing
1759  * process.  But the result will be free of gross errors: misnested or
1760  * unclosed tags, for example, and will be unchanged by spec-complient
1761  * parsing followed by serialization.
1762  *
1763  * The tree building stage is structured as a state machine.
1764  * When comparing the implementation to
1765  * https://www.w3.org/TR/html5/syntax.html#tree-construction
1766  * note that each state is implemented as a function with a
1767  * name ending in `Mode` (because the HTML spec refers to them
1768  * as insertion modes).  The current insertion mode is held by
1769  * the $parseMode property.
1770  *
1771  * The following simplifications have been made:
1772  * - We handle body content only (ie, we start `in body`.)
1773  * - The document is never in "quirks mode".
1774  * - All occurrences of < and > have been entity escaped, so we
1775  *   can parse tags by simply splitting on those two characters.
1776  *   (This also simplifies the handling of < inside <textarea>.)
1777  *   The character < must not appear inside comments.
1778  *   Similarly, all attributes have been "cleaned" and are double-quoted
1779  *   and escaped.
1780  * - All null characters are assumed to have been removed.
1781  * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
1782  *   <frame>, <plaintext>, <xmp>, <iframe>,
1783  *   <noembed>, <noscript>, <script>, <title>.  As a result,
1784  *   further simplifications can be made:
1785  *   - `frameset-ok` is not tracked.
1786  *   - `head element pointer` is not tracked (but presumed non-null)
1787  *   - Tokenizer has only a single mode. (<textarea> wants RCDATA and
1788  *     <style>/<noframes> want RAWTEXT modes which we only loosely emulate.)
1789  *
1790  *   We generally mark places where we omit cases from the spec due to
1791  *   disallowed elements with a comment: `// OMITTED: <element-name>`.
1792  *
1793  *   The HTML spec keeps a flag during the parsing process to track
1794  *   whether or not a "parse error" has been encountered.  We don't
1795  *   bother to track that flag, we just implement the error-handling
1796  *   process as specified.
1797  *
1798  * @ingroup Parser
1799  * @since 1.27
1800  * @see https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
1801  */
1802 class Balancer {
1803         private $parseMode;
1804         /** @var \Iterator */
1805         private $bitsIterator;
1806         private $allowedHtmlElements;
1807         /** @var BalanceActiveFormattingElements */
1808         private $afe;
1809         /** @var BalanceStack */
1810         private $stack;
1811         private $strict;
1812         private $allowComments;
1813         private $config;
1814
1815         private $textIntegrationMode;
1816         private $pendingTableText;
1817         private $originalInsertionMode;
1818         private $fragmentContext;
1819         private $formElementPointer;
1820         private $ignoreLinefeed;
1821         private $inRCDATA;
1822         private $inRAWTEXT;
1823
1824         /** @var callable|null */
1825         private $processingCallback;
1826         /** @var array */
1827         private $processingArgs;
1828
1829         /**
1830          * Valid HTML5 comments.
1831          * Regex borrowed from Tim Starling's "remex-html" project.
1832          */
1833         const VALID_COMMENT_REGEX = "~ !--
1834                 (                           # 1. Comment match detector
1835                         > | -> | # Invalid short close
1836                         (                         # 2. Comment contents
1837                                 (?:
1838                                         (?! --> )
1839                                         (?! --!> )
1840                                         (?! --! \z )
1841                                         (?! -- \z )
1842                                         (?! - \z )
1843                                         .
1844                                 )*+
1845                         )
1846                         (                         # 3. Comment close
1847                                 --> |   # Normal close
1848                                 --!> |  # Comment end bang
1849                                 (                       # 4. Indicate matches requiring EOF
1850                                         --! |                   # EOF in comment end bang state
1851                                         -- |                    # EOF in comment end state
1852                                         -  |                    # EOF in comment end dash state
1853                                         (?#nothing)             # EOF in comment state
1854                                 )
1855                         )
1856                 )
1857                 ([^<]*) \z                  # 5. Non-tag text after the comment
1858                 ~xs";
1859
1860         /**
1861          * Create a new Balancer.
1862          * @param array $config Balancer configuration.  Includes:
1863          *     'strict' : boolean, defaults to false.
1864          *         When true, enforces syntactic constraints on input:
1865          *         all non-tag '<' must be escaped, all attributes must be
1866          *         separated by a single space and double-quoted.  This is
1867          *         consistent with the output of the Sanitizer.
1868          *     'allowedHtmlElements' : array, defaults to null.
1869          *         When present, the keys of this associative array give
1870          *         the acceptable HTML tag names.  When not present, no
1871          *         tag sanitization is done.
1872          *     'tidyCompat' : boolean, defaults to false.
1873          *         When true, the serialization algorithm is tweaked to
1874          *         provide historical compatibility with the old "tidy"
1875          *         program: <p>-wrapping is done to the children of
1876          *         <body> and <blockquote> elements, and empty elements
1877          *         are removed.  The <pre>/<listing>/<textarea> serialization
1878          *         is also tweaked to allow lossless round trips.
1879          *         (See: https://github.com/whatwg/html/issues/944)
1880          *     'allowComments': boolean, defaults to true.
1881          *         When true, allows HTML comments in the input.
1882          *         The Sanitizer generally strips all comments, so if you
1883          *         are running on sanitized output you can set this to
1884          *         false to get a bit more performance.
1885          */
1886         public function __construct( array $config = [] ) {
1887                 $this->config = $config = $config + [
1888                         'strict' => false,
1889                         'allowedHtmlElements' => null,
1890                         'tidyCompat' => false,
1891                         'allowComments' => true,
1892                 ];
1893                 $this->allowedHtmlElements = $config['allowedHtmlElements'];
1894                 $this->strict = $config['strict'];
1895                 $this->allowComments = $config['allowComments'];
1896                 if ( $this->allowedHtmlElements !== null ) {
1897                         // Sanity check!
1898                         $bad = array_uintersect_assoc(
1899                                 $this->allowedHtmlElements,
1900                                 BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE],
1901                                 function ( $a, $b ) {
1902                                         // Ignore the values (just intersect the keys) by saying
1903                                         // all values are equal to each other.
1904                                         return 0;
1905                                 }
1906                         );
1907                         if ( count( $bad ) > 0 ) {
1908                                 $badstr = implode( array_keys( $bad ), ',' );
1909                                 throw new ParameterAssertionException(
1910                                         '$config',
1911                                         'Balance attempted with sanitization including ' .
1912                                         "unsupported elements: {$badstr}"
1913                                 );
1914                         }
1915                 }
1916         }
1917
1918         /**
1919          * Return a balanced HTML string for the HTML fragment given by $text,
1920          * subject to the caveats listed in the class description.  The result
1921          * will typically be idempotent -- that is, rebalancing the output
1922          * would result in no change.
1923          *
1924          * @param string $text The markup to be balanced
1925          * @param callable $processingCallback Callback to do any variable or
1926          *   parameter replacements in HTML attributes values
1927          * @param array|bool $processingArgs Arguments for the processing callback
1928          * @return string The balanced markup
1929          */
1930         public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1931                 $this->parseMode = 'inBodyMode';
1932                 $this->bitsIterator = new ExplodeIterator( '<', $text );
1933                 $this->afe = new BalanceActiveFormattingElements();
1934                 $this->stack = new BalanceStack( $this->config );
1935                 $this->processingCallback = $processingCallback;
1936                 $this->processingArgs = $processingArgs;
1937
1938                 $this->textIntegrationMode =
1939                         $this->ignoreLinefeed =
1940                         $this->inRCDATA =
1941                         $this->inRAWTEXT = false;
1942
1943                 // The stack is constructed with an <html> element already on it.
1944                 // Set this up as a fragment parsed with <body> as the context.
1945                 $this->fragmentContext =
1946                         new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1947                 $this->resetInsertionMode();
1948                 $this->formElementPointer = null;
1949                 for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
1950                         if ( $e->isHtmlNamed( 'form' ) ) {
1951                                 $this->formElementPointer = $e;
1952                                 break;
1953                         }
1954                 }
1955
1956                 // First element is text not tag
1957                 $x = $this->bitsIterator->current();
1958                 $this->bitsIterator->next();
1959                 $this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1960                 // Now process each tag.
1961                 while ( $this->bitsIterator->valid() ) {
1962                         $this->advance();
1963                 }
1964                 $this->insertToken( 'eof', null );
1965                 $result = $this->stack->getOutput();
1966                 // Free memory before returning.
1967                 $this->bitsIterator = null;
1968                 $this->afe = null;
1969                 $this->stack = null;
1970                 $this->fragmentContext = null;
1971                 $this->formElementPointer = null;
1972                 return $result;
1973         }
1974
1975         /**
1976          * Pass a token to the tree builder.  The $token will be one of the
1977          * strings "tag", "endtag", or "text".
1978          */
1979         private function insertToken( $token, $value, $attribs = null, $selfClose = false ) {
1980                 // validate tags against $unsupportedSet
1981                 if ( $token === 'tag' || $token === 'endtag' ) {
1982                         if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
1983                                 // As described in "simplifications" above, these tags are
1984                                 // not supported in the balancer.
1985                                 Assert::invariant(
1986                                         !$this->strict,
1987                                         "Unsupported $token <$value> found."
1988                                 );
1989                                 return false;
1990                         }
1991                 } elseif ( $token === 'text' && $value === '' ) {
1992                         // Don't actually inject the empty string as a text token.
1993                         return true;
1994                 }
1995                 // Support pre/listing/textarea by suppressing initial linefeed
1996                 if ( $this->ignoreLinefeed ) {
1997                         $this->ignoreLinefeed = false;
1998                         if ( $token === 'text' ) {
1999                                 if ( $value[0] === "\n" ) {
2000                                         if ( $value === "\n" ) {
2001                                                 // Nothing would be left, don't inject the empty string.
2002                                                 return true;
2003                                         }
2004                                         $value = substr( $value, 1 );
2005                                 }
2006                         }
2007                 }
2008                 // Some hoops we have to jump through
2009                 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
2010
2011                 // The spec calls this the "tree construction dispatcher".
2012                 $isForeign = true;
2013                 if (
2014                         $this->stack->length() === 0 ||
2015                         $adjusted->isHtml() ||
2016                         $token === 'eof'
2017                 ) {
2018                         $isForeign = false;
2019                 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
2020                         if ( $token === 'text' ) {
2021                                 $isForeign = false;
2022                         } elseif (
2023                                 $token === 'tag' &&
2024                                 $value !== 'mglyph' && $value !== 'malignmark'
2025                         ) {
2026                                 $isForeign = false;
2027                         }
2028                 } elseif (
2029                         $adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
2030                         $adjusted->localName === 'annotation-xml' &&
2031                         $token === 'tag' && $value === 'svg'
2032                 ) {
2033                         $isForeign = false;
2034                 } elseif (
2035                         $adjusted->isHtmlIntegrationPoint() &&
2036                         ( $token === 'tag' || $token === 'text' )
2037                 ) {
2038                         $isForeign = false;
2039                 }
2040                 if ( $isForeign ) {
2041                         return $this->insertForeignToken( $token, $value, $attribs, $selfClose );
2042                 } else {
2043                         $func = $this->parseMode;
2044                         return $this->$func( $token, $value, $attribs, $selfClose );
2045                 }
2046         }
2047
2048         private function insertForeignToken( $token, $value, $attribs = null, $selfClose = false ) {
2049                 if ( $token === 'text' ) {
2050                         $this->stack->insertText( $value );
2051                         return true;
2052                 } elseif ( $token === 'comment' ) {
2053                         $this->stack->insertComment( $value );
2054                         return true;
2055                 } elseif ( $token === 'tag' ) {
2056                         switch ( $value ) {
2057                         case 'font':
2058                                 if ( isset( $attribs['color'] )
2059                                         || isset( $attribs['face'] )
2060                                         || isset( $attribs['size'] )
2061                                 ) {
2062                                         break;
2063                                 }
2064                                 // otherwise, fall through
2065                         case 'b':
2066                         case 'big':
2067                         case 'blockquote':
2068                         case 'body':
2069                         case 'br':
2070                         case 'center':
2071                         case 'code':
2072                         case 'dd':
2073                         case 'div':
2074                         case 'dl':
2075                         case 'dt':
2076                         case 'em':
2077                         case 'embed':
2078                         case 'h1':
2079                         case 'h2':
2080                         case 'h3':
2081                         case 'h4':
2082                         case 'h5':
2083                         case 'h6':
2084                         case 'head':
2085                         case 'hr':
2086                         case 'i':
2087                         case 'img':
2088                         case 'li':
2089                         case 'listing':
2090                         case 'menu':
2091                         case 'meta':
2092                         case 'nobr':
2093                         case 'ol':
2094                         case 'p':
2095                         case 'pre':
2096                         case 'ruby':
2097                         case 's':
2098                         case 'small':
2099                         case 'span':
2100                         case 'strong':
2101                         case 'strike':
2102                         case 'sub':
2103                         case 'sup':
2104                         case 'table':
2105                         case 'tt':
2106                         case 'u':
2107                         case 'ul':
2108                         case 'var':
2109                                 if ( $this->fragmentContext ) {
2110                                         break;
2111                                 }
2112                                 while ( true ) {
2113                                         $this->stack->pop();
2114                                         $node = $this->stack->currentNode;
2115                                         if (
2116                                                 $node->isMathmlTextIntegrationPoint() ||
2117                                                 $node->isHtmlIntegrationPoint() ||
2118                                                 $node->isHtml()
2119                                         ) {
2120                                                 break;
2121                                         }
2122                                 }
2123                                 return $this->insertToken( $token, $value, $attribs, $selfClose );
2124                         }
2125                         // "Any other start tag"
2126                         $adjusted = ( $this->fragmentContext && $this->stack->length() === 1 ) ?
2127                                 $this->fragmentContext : $this->stack->currentNode;
2128                         $this->stack->insertForeignElement(
2129                                 $adjusted->namespaceURI, $value, $attribs
2130                         );
2131                         if ( $selfClose ) {
2132                                 $this->stack->pop();
2133                         }
2134                         return true;
2135                 } elseif ( $token === 'endtag' ) {
2136                         $first = true;
2137                         foreach ( $this->stack as $i => $node ) {
2138                                 if ( $node->isHtml() && !$first ) {
2139                                         // process the end tag as HTML
2140                                         $func = $this->parseMode;
2141                                         return $this->$func( $token, $value, $attribs, $selfClose );
2142                                 } elseif ( $i === 0 ) {
2143                                         return true;
2144                                 } elseif ( $node->localName === $value ) {
2145                                         $this->stack->popTag( $node );
2146                                         return true;
2147                                 }
2148                                 $first = false;
2149                         }
2150                 }
2151         }
2152
2153         /**
2154          * Grab the next "token" from $bitsIterator.  This is either a open/close
2155          * tag or text or a comment, depending on whether the Sanitizer approves.
2156          */
2157         private function advance() {
2158                 $x = $this->bitsIterator->current();
2159                 $this->bitsIterator->next();
2160                 $regs = [];
2161                 // Handle comments.  These won't be generated by mediawiki (they
2162                 // are stripped in the Sanitizer) but may be generated by extensions.
2163                 if (
2164                         $this->allowComments &&
2165                         !( $this->inRCDATA || $this->inRAWTEXT ) &&
2166                         preg_match( self::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2167                         // verify EOF condition where necessary
2168                         ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2169                 ) {
2170                         $contents = $regs[2][0];
2171                         $rest = $regs[5][0];
2172                         $this->insertToken( 'comment', $contents );
2173                         $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2174                         return;
2175                 }
2176                 // $slash: Does the current element start with a '/'?
2177                 // $t: Current element name
2178                 // $attribStr: String between element name and >
2179                 // $brace: Ending '>' or '/>'
2180                 // $rest: Everything until the next element from the $bitsIterator
2181                 if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2182                         list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
2183                         $t = strtolower( $t );
2184                         if ( $this->strict ) {
2185                                 // Verify that attributes are all properly double-quoted
2186                                 Assert::invariant(
2187                                         preg_match(
2188                                                 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2189                                         ),
2190                                         "Bad attribute string found"
2191                                 );
2192                         }
2193                 } else {
2194                         Assert::invariant(
2195                                 !$this->strict, "< found which does not start a valid tag"
2196                         );
2197                         $slash = $t = $attribStr = $brace = $rest = null;
2198                 }
2199                 $goodTag = $t;
2200                 if ( $this->inRCDATA ) {
2201                         if ( $slash && $t === $this->inRCDATA ) {
2202                                 $this->inRCDATA = false;
2203                         } else {
2204                                 // No tags allowed; this emulates the "rcdata" tokenizer mode.
2205                                 $goodTag = false;
2206                         }
2207                 }
2208                 if ( $this->inRAWTEXT ) {
2209                         if ( $slash && $t === $this->inRAWTEXT ) {
2210                                 $this->inRAWTEXT = false;
2211                         } else {
2212                                 // No tags allowed, no entity-escaping done.
2213                                 $goodTag = false;
2214                         }
2215                 }
2216                 $sanitize = $this->allowedHtmlElements !== null;
2217                 if ( $sanitize ) {
2218                         $goodTag = $t && isset( $this->allowedHtmlElements[$t] );
2219                 }
2220                 if ( $goodTag ) {
2221                         if ( is_callable( $this->processingCallback ) ) {
2222                                 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2223                         }
2224                         if ( $sanitize ) {
2225                                 $goodTag = Sanitizer::validateTag( $attribStr, $t );
2226                         }
2227                 }
2228                 if ( $goodTag ) {
2229                         if ( $sanitize ) {
2230                                 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2231                                 $attribs = Sanitizer::validateTagAttributes( $attribs, $t );
2232                         } else {
2233                                 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2234                         }
2235                         $goodTag = $this->insertToken(
2236                                 $slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2237                         );
2238                 }
2239                 if ( $goodTag ) {
2240                         $rest = str_replace( '>', '&gt;', $rest );
2241                         $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2242                 } elseif ( $this->inRAWTEXT ) {
2243                         $this->insertToken( 'text', "<$x" );
2244                 } else {
2245                         // bad tag; serialize entire thing as text.
2246                         $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2247                 }
2248         }
2249
2250         private function switchMode( $mode ) {
2251                 Assert::parameter(
2252                         substr( $mode, -4 ) === 'Mode', '$mode', 'should end in Mode'
2253                 );
2254                 $oldMode = $this->parseMode;
2255                 $this->parseMode = $mode;
2256                 return $oldMode;
2257         }
2258
2259         private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfClose ) {
2260                 $this->switchMode( $mode );
2261                 return $this->insertToken( $token, $value, $attribs, $selfClose );
2262         }
2263
2264         private function resetInsertionMode() {
2265                 $last = false;
2266                 foreach ( $this->stack as $i => $node ) {
2267                         if ( $i === 0 ) {
2268                                 $last = true;
2269                                 if ( $this->fragmentContext ) {
2270                                         $node = $this->fragmentContext;
2271                                 }
2272                         }
2273                         if ( $node->isHtml() ) {
2274                                 switch ( $node->localName ) {
2275                                 case 'select':
2276                                         $stackLength = $this->stack->length();
2277                                         for ( $j = $i + 1; $j < $stackLength - 1; $j++ ) {
2278                                                 $ancestor = $this->stack->node( $stackLength - $j - 1 );
2279                                                 if ( $ancestor->isHtmlNamed( 'template' ) ) {
2280                                                         break;
2281                                                 }
2282                                                 if ( $ancestor->isHtmlNamed( 'table' ) ) {
2283                                                         $this->switchMode( 'inSelectInTableMode' );
2284                                                         return;
2285                                                 }
2286                                         }
2287                                         $this->switchMode( 'inSelectMode' );
2288                                         return;
2289                                 case 'tr':
2290                                         $this->switchMode( 'inRowMode' );
2291                                         return;
2292                                 case 'tbody':
2293                                 case 'tfoot':
2294                                 case 'thead':
2295                                         $this->switchMode( 'inTableBodyMode' );
2296                                         return;
2297                                 case 'caption':
2298                                         $this->switchMode( 'inCaptionMode' );
2299                                         return;
2300                                 case 'colgroup':
2301                                         $this->switchMode( 'inColumnGroupMode' );
2302                                         return;
2303                                 case 'table':
2304                                         $this->switchMode( 'inTableMode' );
2305                                         return;
2306                                 case 'template':
2307                                         $this->switchMode(
2308                                                 array_slice( $this->templateInsertionModes, -1 )[0]
2309                                         );
2310                                         return;
2311                                 case 'body':
2312                                         $this->switchMode( 'inBodyMode' );
2313                                         return;
2314                                 // OMITTED: <frameset>
2315                                 // OMITTED: <html>
2316                                 // OMITTED: <head>
2317                                 default:
2318                                         if ( !$last ) {
2319                                                 // OMITTED: <head>
2320                                                 if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2321                                                         $this->switchMode( 'inCellMode' );
2322                                                         return;
2323                                                 }
2324                                         }
2325                                 }
2326                         }
2327                         if ( $last ) {
2328                                 $this->switchMode( 'inBodyMode' );
2329                                 return;
2330                         }
2331                 }
2332         }
2333
2334         private function stopParsing() {
2335                 // Most of the spec methods are inapplicable, other than step 2:
2336                 // "pop all the nodes off the stack of open elements".
2337                 // We're going to keep the top-most <html> element on the stack, though.
2338
2339                 // Clear the AFE list first, otherwise the element objects will stay live
2340                 // during serialization, potentially using O(N^2) memory. Note that
2341                 // popping the stack will never result in reconstructing the active
2342                 // formatting elements.
2343                 $this->afe = null;
2344                 $this->stack->popTo( 1 );
2345         }
2346
2347         private function parseRawText( $value, $attribs = null ) {
2348                 $this->stack->insertHTMLElement( $value, $attribs );
2349                 $this->inRAWTEXT = $value;
2350                 $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2351                 return true;
2352         }
2353
2354         private function inTextMode( $token, $value, $attribs = null, $selfClose = false ) {
2355                 if ( $token === 'text' ) {
2356                         $this->stack->insertText( $value );
2357                         return true;
2358                 } elseif ( $token === 'eof' ) {
2359                         $this->stack->pop();
2360                         return $this->switchModeAndReprocess(
2361                                 $this->originalInsertionMode, $token, $value, $attribs, $selfClose
2362                         );
2363                 } elseif ( $token === 'endtag' ) {
2364                         $this->stack->pop();
2365                         $this->switchMode( $this->originalInsertionMode );
2366                         return true;
2367                 }
2368                 return true;
2369         }
2370
2371         private function inHeadMode( $token, $value, $attribs = null, $selfClose = false ) {
2372                 if ( $token === 'text' ) {
2373                         if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2374                                 $this->stack->insertText( $matches[0] );
2375                                 $value = substr( $value, strlen( $matches[0] ) );
2376                         }
2377                         if ( strlen( $value ) === 0 ) {
2378                                 return true; // All text handled.
2379                         }
2380                         // Fall through to handle non-whitespace below.
2381                 } elseif ( $token === 'tag' ) {
2382                         switch ( $value ) {
2383                         case 'meta':
2384                                 // OMITTED: in a full HTML parser, this might change the encoding.
2385                                 // falls through
2386                         // OMITTED: <html>
2387                         case 'base':
2388                         case 'basefont':
2389                         case 'bgsound':
2390                         case 'link':
2391                                 $this->stack->insertHTMLElement( $value, $attribs );
2392                                 $this->stack->pop();
2393                                 return true;
2394                         // OMITTED: <title>
2395                         // OMITTED: <noscript>
2396                         case 'noframes':
2397                         case 'style':
2398                                 return $this->parseRawText( $value, $attribs );
2399                         // OMITTED: <script>
2400                         case 'template':
2401                                 $this->stack->insertHTMLElement( $value, $attribs );
2402                                 $this->afe->insertMarker();
2403                                 // OMITTED: frameset_ok
2404                                 $this->switchMode( 'inTemplateMode' );
2405                                 $this->templateInsertionModes[] = $this->parseMode;
2406                                 return true;
2407                         // OMITTED: <head>
2408                         }
2409                 } elseif ( $token === 'endtag' ) {
2410                         switch ( $value ) {
2411                         // OMITTED: <head>
2412                         // OMITTED: <body>
2413                         // OMITTED: <html>
2414                         case 'br':
2415                                 break; // handle at the bottom of the function
2416                         case 'template':
2417                                 if ( $this->stack->indexOf( $value ) < 0 ) {
2418                                         return true; // Ignore the token.
2419                                 }
2420                                 $this->stack->generateImpliedEndTags( null, true /* thorough */ );
2421                                 $this->stack->popTag( $value );
2422                                 $this->afe->clearToMarker();
2423                                 array_pop( $this->templateInsertionModes );
2424                                 $this->resetInsertionMode();
2425                                 return true;
2426                         default:
2427                                 // ignore any other end tag
2428                                 return true;
2429                         }
2430                 } elseif ( $token === 'comment' ) {
2431                         $this->stack->insertComment( $value );
2432                         return true;
2433                 }
2434
2435                 // If not handled above
2436                 $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2437                 // Then redo this one
2438                 return $this->insertToken( $token, $value, $attribs, $selfClose );
2439         }
2440
2441         private function inBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
2442                 if ( $token === 'text' ) {
2443                         $this->afe->reconstruct( $this->stack );
2444                         $this->stack->insertText( $value );
2445                         return true;
2446                 } elseif ( $token === 'eof' ) {
2447                         if ( !empty( $this->templateInsertionModes ) ) {
2448                                 return $this->inTemplateMode( $token, $value, $attribs, $selfClose );
2449                         }
2450                         $this->stopParsing();
2451                         return true;
2452                 } elseif ( $token === 'tag' ) {
2453                         switch ( $value ) {
2454                         // OMITTED: <html>
2455                         case 'base':
2456                         case 'basefont':
2457                         case 'bgsound':
2458                         case 'link':
2459                         case 'meta':
2460                         case 'noframes':
2461                         // OMITTED: <script>
2462                         case 'style':
2463                         case 'template':
2464                         // OMITTED: <title>
2465                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2466                         // OMITTED: <body>
2467                         // OMITTED: <frameset>
2468
2469                         case 'address':
2470                         case 'article':
2471                         case 'aside':
2472                         case 'blockquote':
2473                         case 'center':
2474                         case 'details':
2475                         case 'dialog':
2476                         case 'dir':
2477                         case 'div':
2478                         case 'dl':
2479                         case 'fieldset':
2480                         case 'figcaption':
2481                         case 'figure':
2482                         case 'footer':
2483                         case 'header':
2484                         case 'hgroup':
2485                         case 'main':
2486                         case 'nav':
2487                         case 'ol':
2488                         case 'p':
2489                         case 'section':
2490                         case 'summary':
2491                         case 'ul':
2492                                 if ( $this->stack->inButtonScope( 'p' ) ) {
2493                                         $this->inBodyMode( 'endtag', 'p' );
2494                                 }
2495                                 $this->stack->insertHTMLElement( $value, $attribs );
2496                                 return true;
2497
2498                         case 'menu':
2499                                 if ( $this->stack->inButtonScope( "p" ) ) {
2500                                         $this->inBodyMode( 'endtag', 'p' );
2501                                 }
2502                                 if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
2503                                         $this->stack->pop();
2504                                 }
2505                                 $this->stack->insertHTMLElement( $value, $attribs );
2506                                 return true;
2507
2508                         case 'h1':
2509                         case 'h2':
2510                         case 'h3':
2511                         case 'h4':
2512                         case 'h5':
2513                         case 'h6':
2514                                 if ( $this->stack->inButtonScope( 'p' ) ) {
2515                                         $this->inBodyMode( 'endtag', 'p' );
2516                                 }
2517                                 if ( $this->stack->currentNode->isA( BalanceSets::$headingSet ) ) {
2518                                         $this->stack->pop();
2519                                 }
2520                                 $this->stack->insertHTMLElement( $value, $attribs );
2521                                 return true;
2522
2523                         case 'pre':
2524                         case 'listing':
2525                                 if ( $this->stack->inButtonScope( 'p' ) ) {
2526                                         $this->inBodyMode( 'endtag', 'p' );
2527                                 }
2528                                 $this->stack->insertHTMLElement( $value, $attribs );
2529                                 $this->ignoreLinefeed = true;
2530                                 // OMITTED: frameset_ok
2531                                 return true;
2532
2533                         case 'form':
2534                                 if (
2535                                         $this->formElementPointer &&
2536                                         $this->stack->indexOf( 'template' ) < 0
2537                                 ) {
2538                                         return true; // in a form, not in a template.
2539                                 }
2540                                 if ( $this->stack->inButtonScope( "p" ) ) {
2541                                         $this->inBodyMode( 'endtag', 'p' );
2542                                 }
2543                                 $elt = $this->stack->insertHTMLElement( $value, $attribs );
2544                                 if ( $this->stack->indexOf( 'template' ) < 0 ) {
2545                                         $this->formElementPointer = $elt;
2546                                 }
2547                                 return true;
2548
2549                         case 'li':
2550                                 // OMITTED: frameset_ok
2551                                 foreach ( $this->stack as $node ) {
2552                                         if ( $node->isHtmlNamed( 'li' ) ) {
2553                                                 $this->inBodyMode( 'endtag', 'li' );
2554                                                 break;
2555                                         }
2556                                         if (
2557                                                 $node->isA( BalanceSets::$specialSet ) &&
2558                                                 !$node->isA( BalanceSets::$addressDivPSet )
2559                                         ) {
2560                                                 break;
2561                                         }
2562                                 }
2563                                 if ( $this->stack->inButtonScope( 'p' ) ) {
2564                                         $this->inBodyMode( 'endtag', 'p' );
2565                                 }
2566                                 $this->stack->insertHTMLElement( $value, $attribs );
2567                                 return true;
2568
2569                         case 'dd':
2570                         case 'dt':
2571                                 // OMITTED: frameset_ok
2572                                 foreach ( $this->stack as $node ) {
2573                                         if ( $node->isHtmlNamed( 'dd' ) ) {
2574                                                 $this->inBodyMode( 'endtag', 'dd' );
2575                                                 break;
2576                                         }
2577                                         if ( $node->isHtmlNamed( 'dt' ) ) {
2578                                                 $this->inBodyMode( 'endtag', 'dt' );
2579                                                 break;
2580                                         }
2581                                         if (
2582                                                 $node->isA( BalanceSets::$specialSet ) &&
2583                                                 !$node->isA( BalanceSets::$addressDivPSet )
2584                                         ) {
2585                                                 break;
2586                                         }
2587                                 }
2588                                 if ( $this->stack->inButtonScope( 'p' ) ) {
2589                                         $this->inBodyMode( 'endtag', 'p' );
2590                                 }
2591                                 $this->stack->insertHTMLElement( $value, $attribs );
2592                                 return true;
2593
2594                         // OMITTED: <plaintext>
2595
2596                         case 'button':
2597                                 if ( $this->stack->inScope( 'button' ) ) {
2598                                         $this->inBodyMode( 'endtag', 'button' );
2599                                         return $this->insertToken( $token, $value, $attribs, $selfClose );
2600                                 }
2601                                 $this->afe->reconstruct( $this->stack );
2602                                 $this->stack->insertHTMLElement( $value, $attribs );
2603                                 return true;
2604
2605                         case 'a':
2606                                 $activeElement = $this->afe->findElementByTag( 'a' );
2607                                 if ( $activeElement ) {
2608                                         $this->inBodyMode( 'endtag', 'a' );
2609                                         if ( $this->afe->isInList( $activeElement ) ) {
2610                                                 $this->afe->remove( $activeElement );
2611                                                 // Don't flatten here, since when we fall
2612                                                 // through below we might foster parent
2613                                                 // the new <a> tag inside this one.
2614                                                 $this->stack->removeElement( $activeElement, false );
2615                                         }
2616                                 }
2617                                 // Falls through
2618                         case 'b':
2619                         case 'big':
2620                         case 'code':
2621                         case 'em':
2622                         case 'font':
2623                         case 'i':
2624                         case 's':
2625                         case 'small':
2626                         case 'strike':
2627                         case 'strong':
2628                         case 'tt':
2629                         case 'u':
2630                                 $this->afe->reconstruct( $this->stack );
2631                                 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
2632                                 return true;
2633
2634                         case 'nobr':
2635                                 $this->afe->reconstruct( $this->stack );
2636                                 if ( $this->stack->inScope( 'nobr' ) ) {
2637                                         $this->inBodyMode( 'endtag', 'nobr' );
2638                                         $this->afe->reconstruct( $this->stack );
2639                                 }
2640                                 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ) );
2641                                 return true;
2642
2643                         case 'applet':
2644                         case 'marquee':
2645                         case 'object':
2646                                 $this->afe->reconstruct( $this->stack );
2647                                 $this->stack->insertHTMLElement( $value, $attribs );
2648                                 $this->afe->insertMarker();
2649                                 // OMITTED: frameset_ok
2650                                 return true;
2651
2652                         case 'table':
2653                                 // The document is never in "quirks mode"; see simplifications
2654                                 // above.
2655                                 if ( $this->stack->inButtonScope( 'p' ) ) {
2656                                         $this->inBodyMode( 'endtag', 'p' );
2657                                 }
2658                                 $this->stack->insertHTMLElement( $value, $attribs );
2659                                 // OMITTED: frameset_ok
2660                                 $this->switchMode( 'inTableMode' );
2661                                 return true;
2662
2663                         case 'area':
2664                         case 'br':
2665                         case 'embed':
2666                         case 'img':
2667                         case 'keygen':
2668                         case 'wbr':
2669                                 $this->afe->reconstruct( $this->stack );
2670                                 $this->stack->insertHTMLElement( $value, $attribs );
2671                                 $this->stack->pop();
2672                                 // OMITTED: frameset_ok
2673                                 return true;
2674
2675                         case 'input':
2676                                 $this->afe->reconstruct( $this->stack );
2677                                 $this->stack->insertHTMLElement( $value, $attribs );
2678                                 $this->stack->pop();
2679                                 // OMITTED: frameset_ok
2680                                 // (hence we don't need to examine the tag's "type" attribute)
2681                                 return true;
2682
2683                         case 'param':
2684                         case 'source':
2685                         case 'track':
2686                                 $this->stack->insertHTMLElement( $value, $attribs );
2687                                 $this->stack->pop();
2688                                 return true;
2689
2690                         case 'hr':
2691                                 if ( $this->stack->inButtonScope( 'p' ) ) {
2692                                         $this->inBodyMode( 'endtag', 'p' );
2693                                 }
2694                                 if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
2695                                         $this->stack->pop();
2696                                 }
2697                                 $this->stack->insertHTMLElement( $value, $attribs );
2698                                 $this->stack->pop();
2699                                 return true;
2700
2701                         case 'image':
2702                                 // warts!
2703                                 return $this->inBodyMode( $token, 'img', $attribs, $selfClose );
2704
2705                         case 'textarea':
2706                                 $this->stack->insertHTMLElement( $value, $attribs );
2707                                 $this->ignoreLinefeed = true;
2708                                 $this->inRCDATA = $value; // emulate rcdata tokenizer mode
2709                                 // OMITTED: frameset_ok
2710                                 return true;
2711
2712                         // OMITTED: <xmp>
2713                         // OMITTED: <iframe>
2714                         // OMITTED: <noembed>
2715                         // OMITTED: <noscript>
2716
2717                         case 'select':
2718                                 $this->afe->reconstruct( $this->stack );
2719                                 $this->stack->insertHTMLElement( $value, $attribs );
2720                                 switch ( $this->parseMode ) {
2721                                 case 'inTableMode':
2722                                 case 'inCaptionMode':
2723                                 case 'inTableBodyMode':
2724                                 case 'inRowMode':
2725                                 case 'inCellMode':
2726                                         $this->switchMode( 'inSelectInTableMode' );
2727                                         return true;
2728                                 default:
2729                                         $this->switchMode( 'inSelectMode' );
2730                                         return true;
2731                                 }
2732
2733                         case 'optgroup':
2734                         case 'option':
2735                                 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
2736                                         $this->inBodyMode( 'endtag', 'option' );
2737                                 }
2738                                 $this->afe->reconstruct( $this->stack );
2739                                 $this->stack->insertHTMLElement( $value, $attribs );
2740                                 return true;
2741
2742                         case 'menuitem':
2743                                 if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
2744                                         $this->stack->pop();
2745                                 }
2746                                 $this->afe->reconstruct( $this->stack );
2747                                 $this->stack->insertHTMLElement( $value, $attribs );
2748                                 return true;
2749
2750                         case 'rb':
2751                         case 'rtc':
2752                                 if ( $this->stack->inScope( 'ruby' ) ) {
2753                                         $this->stack->generateImpliedEndTags();
2754                                 }
2755                                 $this->stack->insertHTMLElement( $value, $attribs );
2756                                 return true;
2757
2758                         case 'rp':
2759                         case 'rt':
2760                                 if ( $this->stack->inScope( 'ruby' ) ) {
2761                                         $this->stack->generateImpliedEndTags( 'rtc' );
2762                                 }
2763                                 $this->stack->insertHTMLElement( $value, $attribs );
2764                                 return true;
2765
2766                         case 'math':
2767                                 $this->afe->reconstruct( $this->stack );
2768                                 // We skip the spec's "adjust MathML attributes" and
2769                                 // "adjust foreign attributes" steps, since the browser will
2770                                 // do this later when it parses the output and it doesn't affect
2771                                 // balancing.
2772                                 $this->stack->insertForeignElement(
2773                                         BalanceSets::MATHML_NAMESPACE, $value, $attribs
2774                                 );
2775                                 if ( $selfClose ) {
2776                                         // emit explicit </math> tag.
2777                                         $this->stack->pop();
2778                                 }
2779                                 return true;
2780
2781                         case 'svg':
2782                                 $this->afe->reconstruct( $this->stack );
2783                                 // We skip the spec's "adjust SVG attributes" and
2784                                 // "adjust foreign attributes" steps, since the browser will
2785                                 // do this later when it parses the output and it doesn't affect
2786                                 // balancing.
2787                                 $this->stack->insertForeignElement(
2788                                         BalanceSets::SVG_NAMESPACE, $value, $attribs
2789                                 );
2790                                 if ( $selfClose ) {
2791                                         // emit explicit </svg> tag.
2792                                         $this->stack->pop();
2793                                 }
2794                                 return true;
2795
2796                         case 'caption':
2797                         case 'col':
2798                         case 'colgroup':
2799                         // OMITTED: <frame>
2800                         case 'head':
2801                         case 'tbody':
2802                         case 'td':
2803                         case 'tfoot':
2804                         case 'th':
2805                         case 'thead':
2806                         case 'tr':
2807                                 // Ignore table tags if we're not inTableMode
2808                                 return true;
2809                         }
2810
2811                         // Handle any other start tag here
2812                         $this->afe->reconstruct( $this->stack );
2813                         $this->stack->insertHTMLElement( $value, $attribs );
2814                         return true;
2815                 } elseif ( $token === 'endtag' ) {
2816                         switch ( $value ) {
2817                         // </body>,</html> are unsupported.
2818
2819                         case 'template':
2820                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2821
2822                         case 'address':
2823                         case 'article':
2824                         case 'aside':
2825                         case 'blockquote':
2826                         case 'button':
2827                         case 'center':
2828                         case 'details':
2829                         case 'dialog':
2830                         case 'dir':
2831                         case 'div':
2832                         case 'dl':
2833                         case 'fieldset':
2834                         case 'figcaption':
2835                         case 'figure':
2836                         case 'footer':
2837                         case 'header':
2838                         case 'hgroup':
2839                         case 'listing':
2840                         case 'main':
2841                         case 'menu':
2842                         case 'nav':
2843                         case 'ol':
2844                         case 'pre':
2845                         case 'section':
2846                         case 'summary':
2847                         case 'ul':
2848                                 // Ignore if there is not a matching open tag
2849                                 if ( !$this->stack->inScope( $value ) ) {
2850                                         return true;
2851                                 }
2852                                 $this->stack->generateImpliedEndTags();
2853                                 $this->stack->popTag( $value );
2854                                 return true;
2855
2856                         case 'form':
2857                                 if ( $this->stack->indexOf( 'template' ) < 0 ) {
2858                                         $openform = $this->formElementPointer;
2859                                         $this->formElementPointer = null;
2860                                         if ( !$openform || !$this->stack->inScope( $openform ) ) {
2861                                                 return true;
2862                                         }
2863                                         $this->stack->generateImpliedEndTags();
2864                                         // Don't flatten yet if we're removing a <form> element
2865                                         // out-of-order. (eg. `<form><div></form>`)
2866                                         $flatten = ( $this->stack->currentNode === $openform );
2867                                         $this->stack->removeElement( $openform, $flatten );
2868                                 } else {
2869                                         if ( !$this->stack->inScope( 'form' ) ) {
2870                                                 return true;
2871                                         }
2872                                         $this->stack->generateImpliedEndTags();
2873                                         $this->stack->popTag( 'form' );
2874                                 }
2875                                 return true;
2876
2877                         case 'p':
2878                                 if ( !$this->stack->inButtonScope( 'p' ) ) {
2879                                         $this->inBodyMode( 'tag', 'p', [] );
2880                                         return $this->insertToken( $token, $value, $attribs, $selfClose );
2881                                 }
2882                                 $this->stack->generateImpliedEndTags( $value );
2883                                 $this->stack->popTag( $value );
2884                                 return true;
2885
2886                         case 'li':
2887                                 if ( !$this->stack->inListItemScope( $value ) ) {
2888                                         return true; // ignore
2889                                 }
2890                                 $this->stack->generateImpliedEndTags( $value );
2891                                 $this->stack->popTag( $value );
2892                                 return true;
2893
2894                         case 'dd':
2895                         case 'dt':
2896                                 if ( !$this->stack->inScope( $value ) ) {
2897                                         return true; // ignore
2898                                 }
2899                                 $this->stack->generateImpliedEndTags( $value );
2900                                 $this->stack->popTag( $value );
2901                                 return true;
2902
2903                         case 'h1':
2904                         case 'h2':
2905                         case 'h3':
2906                         case 'h4':
2907                         case 'h5':
2908                         case 'h6':
2909                                 if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2910                                         return true; // ignore
2911                                 }
2912                                 $this->stack->generateImpliedEndTags();
2913                                 $this->stack->popTag( BalanceSets::$headingSet );
2914                                 return true;
2915
2916                         case 'sarcasm':
2917                                 // Take a deep breath, then:
2918                                 break;
2919
2920                         case 'a':
2921                         case 'b':
2922                         case 'big':
2923                         case 'code':
2924                         case 'em':
2925                         case 'font':
2926                         case 'i':
2927                         case 'nobr':
2928                         case 's':
2929                         case 'small':
2930                         case 'strike':
2931                         case 'strong':
2932                         case 'tt':
2933                         case 'u':
2934                                 if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
2935                                         return true; // If we did something, we're done.
2936                                 }
2937                                 break; // Go to the "any other end tag" case.
2938
2939                         case 'applet':
2940                         case 'marquee':
2941                         case 'object':
2942                                 if ( !$this->stack->inScope( $value ) ) {
2943                                         return true; // ignore
2944                                 }
2945                                 $this->stack->generateImpliedEndTags();
2946                                 $this->stack->popTag( $value );
2947                                 $this->afe->clearToMarker();
2948                                 return true;
2949
2950                         case 'br':
2951                                 // Turn </br> into <br>
2952                                 return $this->inBodyMode( 'tag', $value, [] );
2953                         }
2954
2955                         // Any other end tag goes here
2956                         foreach ( $this->stack as $i => $node ) {
2957                                 if ( $node->isHtmlNamed( $value ) ) {
2958                                         $this->stack->generateImpliedEndTags( $value );
2959                                         $this->stack->popTo( $i ); // including $i
2960                                         break;
2961                                 } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2962                                         return true; // ignore this close token.
2963                                 }
2964                         }
2965                         return true;
2966                 } elseif ( $token === 'comment' ) {
2967                         $this->stack->insertComment( $value );
2968                         return true;
2969                 } else {
2970                         Assert::invariant( false, "Bad token type: $token" );
2971                 }
2972         }
2973
2974         private function inTableMode( $token, $value, $attribs = null, $selfClose = false ) {
2975                 if ( $token === 'text' ) {
2976                         if ( $this->textIntegrationMode ) {
2977                                 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
2978                         } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2979                                 $this->pendingTableText = '';
2980                                 $this->originalInsertionMode = $this->parseMode;
2981                                 return $this->switchModeAndReprocess( 'inTableTextMode',
2982                                         $token, $value, $attribs, $selfClose );
2983                         }
2984                         // fall through to default case.
2985                 } elseif ( $token === 'eof' ) {
2986                         $this->stopParsing();
2987                         return true;
2988                 } elseif ( $token === 'tag' ) {
2989                         switch ( $value ) {
2990                         case 'caption':
2991                                 $this->afe->insertMarker();
2992                                 $this->stack->insertHTMLElement( $value, $attribs );
2993                                 $this->switchMode( 'inCaptionMode' );
2994                                 return true;
2995                         case 'colgroup':
2996                                 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2997                                 $this->stack->insertHTMLElement( $value, $attribs );
2998                                 $this->switchMode( 'inColumnGroupMode' );
2999                                 return true;
3000                         case 'col':
3001                                 $this->inTableMode( 'tag', 'colgroup', [] );
3002                                 return $this->insertToken( $token, $value, $attribs, $selfClose );
3003                         case 'tbody':
3004                         case 'tfoot':
3005                         case 'thead':
3006                                 $this->stack->clearToContext( BalanceSets::$tableContextSet );
3007                                 $this->stack->insertHTMLElement( $value, $attribs );
3008                                 $this->switchMode( 'inTableBodyMode' );
3009                                 return true;
3010                         case 'td':
3011                         case 'th':
3012                         case 'tr':
3013                                 $this->inTableMode( 'tag', 'tbody', [] );
3014                                 return $this->insertToken( $token, $value, $attribs, $selfClose );
3015                         case 'table':
3016                                 if ( !$this->stack->inTableScope( $value ) ) {
3017                                         return true; // Ignore this tag.
3018                                 }
3019                                 $this->inTableMode( 'endtag', $value );
3020                                 return $this->insertToken( $token, $value, $attribs, $selfClose );
3021
3022                         case 'style':
3023                         // OMITTED: <script>
3024                         case 'template':
3025                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3026
3027                         case 'input':
3028                                 if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
3029                                         break; // Handle this as "everything else"
3030                                 }
3031                                 $this->stack->insertHTMLElement( $value, $attribs );
3032                                 $this->stack->pop();
3033                                 return true;
3034
3035                         case 'form':
3036                                 if (
3037                                         $this->formElementPointer ||
3038                                         $this->stack->indexOf( 'template' ) >= 0
3039                                 ) {
3040                                         return true; // ignore this token
3041                                 }
3042                                 $this->formElementPointer =
3043                                         $this->stack->insertHTMLElement( $value, $attribs );
3044                                 $this->stack->popTag( $this->formElementPointer );
3045                                 return true;
3046                         }
3047                         // Fall through for "anything else" clause.
3048                 } elseif ( $token === 'endtag' ) {
3049                         switch ( $value ) {
3050                         case 'table':
3051                                 if ( !$this->stack->inTableScope( $value ) ) {
3052                                         return true; // Ignore.
3053                                 }
3054                                 $this->stack->popTag( $value );
3055                                 $this->resetInsertionMode();
3056                                 return true;
3057                         // OMITTED: <body>
3058                         case 'caption':
3059                         case 'col':
3060                         case 'colgroup':
3061                         // OMITTED: <html>
3062                         case 'tbody':
3063                         case 'td':
3064                         case 'tfoot':
3065                         case 'th':
3066                         case 'thead':
3067                         case 'tr':
3068                                 return true; // Ignore the token.
3069                         case 'template':
3070                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3071                         }
3072                         // Fall through for "anything else" clause.
3073                 } elseif ( $token === 'comment' ) {
3074                         $this->stack->insertComment( $value );
3075                         return true;
3076                 }
3077                 // This is the "anything else" case:
3078                 $this->stack->fosterParentMode = true;
3079                 $this->inBodyMode( $token, $value, $attribs, $selfClose );
3080                 $this->stack->fosterParentMode = false;
3081                 return true;
3082         }
3083
3084         private function inTableTextMode( $token, $value, $attribs = null, $selfClose = false ) {
3085                 if ( $token === 'text' ) {
3086                         $this->pendingTableText .= $value;
3087                         return true;
3088                 }
3089                 // Non-text token:
3090                 $text = $this->pendingTableText;
3091                 $this->pendingTableText = '';
3092                 if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3093                         // This should match the "anything else" case inTableMode
3094                         $this->stack->fosterParentMode = true;
3095                         $this->inBodyMode( 'text', $text );
3096                         $this->stack->fosterParentMode = false;
3097                 } else {
3098                         // Pending text is just whitespace.
3099                         $this->stack->insertText( $text );
3100                 }
3101                 return $this->switchModeAndReprocess(
3102                         $this->originalInsertionMode, $token, $value, $attribs, $selfClose
3103                 );
3104         }
3105
3106         // helper for inCaptionMode
3107         private function endCaption() {
3108                 if ( !$this->stack->inTableScope( 'caption' ) ) {
3109                         return false;
3110                 }
3111                 $this->stack->generateImpliedEndTags();
3112                 $this->stack->popTag( 'caption' );
3113                 $this->afe->clearToMarker();
3114                 $this->switchMode( 'inTableMode' );
3115                 return true;
3116         }
3117
3118         private function inCaptionMode( $token, $value, $attribs = null, $selfClose = false ) {
3119                 if ( $token === 'tag' ) {
3120                         switch ( $value ) {
3121                         case 'caption':
3122                         case 'col':
3123                         case 'colgroup':
3124                         case 'tbody':
3125                         case 'td':
3126                         case 'tfoot':
3127                         case 'th':
3128                         case 'thead':
3129                         case 'tr':
3130                                 if ( $this->endCaption() ) {
3131                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3132                                 }
3133                                 return true;
3134                         }
3135                         // Fall through to "anything else" case.
3136                 } elseif ( $token === 'endtag' ) {
3137                         switch ( $value ) {
3138                         case 'caption':
3139                                 $this->endCaption();
3140                                 return true;
3141                         case 'table':
3142                                 if ( $this->endCaption() ) {
3143                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3144                                 }
3145                                 return true;
3146                         case 'body':
3147                         case 'col':
3148                         case 'colgroup':
3149                         // OMITTED: <html>
3150                         case 'tbody':
3151                         case 'td':
3152                         case 'tfoot':
3153                         case 'th':
3154                         case 'thead':
3155                         case 'tr':
3156                                 // Ignore the token
3157                                 return true;
3158                         }
3159                         // Fall through to "anything else" case.
3160                 }
3161                 // The Anything Else case
3162                 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3163         }
3164
3165         private function inColumnGroupMode( $token, $value, $attribs = null, $selfClose = false ) {
3166                 if ( $token === 'text' ) {
3167                         if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
3168                                 $this->stack->insertText( $matches[0] );
3169                                 $value = substr( $value, strlen( $matches[0] ) );
3170                         }
3171                         if ( strlen( $value ) === 0 ) {
3172                                 return true; // All text handled.
3173                         }
3174                         // Fall through to handle non-whitespace below.
3175                 } elseif ( $token === 'tag' ) {
3176                         switch ( $value ) {
3177                         // OMITTED: <html>
3178                         case 'col':
3179                                 $this->stack->insertHTMLElement( $value, $attribs );
3180                                 $this->stack->pop();
3181                                 return true;
3182                         case 'template':
3183                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3184                         }
3185                         // Fall through for "anything else".
3186                 } elseif ( $token === 'endtag' ) {
3187                         switch ( $value ) {
3188                         case 'colgroup':
3189                                 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3190                                         return true; // Ignore the token.
3191                                 }
3192                                 $this->stack->pop();
3193                                 $this->switchMode( 'inTableMode' );
3194                                 return true;
3195                         case 'col':
3196                                 return true; // Ignore the token.
3197                         case 'template':
3198                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3199                         }
3200                         // Fall through for "anything else".
3201                 } elseif ( $token === 'eof' ) {
3202                         return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3203                 } elseif ( $token === 'comment' ) {
3204                         $this->stack->insertComment( $value );
3205                         return true;
3206                 }
3207
3208                 // Anything else
3209                 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3210                         return true; // Ignore the token.
3211                 }
3212                 $this->inColumnGroupMode( 'endtag', 'colgroup' );
3213                 return $this->insertToken( $token, $value, $attribs, $selfClose );
3214         }
3215
3216         // Helper function for inTableBodyMode
3217         private function endSection() {
3218                 if ( !(
3219                         $this->stack->inTableScope( 'tbody' ) ||
3220                         $this->stack->inTableScope( 'thead' ) ||
3221                         $this->stack->inTableScope( 'tfoot' )
3222                 ) ) {
3223                         return false;
3224                 }
3225                 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3226                 $this->stack->pop();
3227                 $this->switchMode( 'inTableMode' );
3228                 return true;
3229         }
3230         private function inTableBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
3231                 if ( $token === 'tag' ) {
3232                         switch ( $value ) {
3233                         case 'tr':
3234                                 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3235                                 $this->stack->insertHTMLElement( $value, $attribs );
3236                                 $this->switchMode( 'inRowMode' );
3237                                 return true;
3238                         case 'th':
3239                         case 'td':
3240                                 $this->inTableBodyMode( 'tag', 'tr', [] );
3241                                 $this->insertToken( $token, $value, $attribs, $selfClose );
3242                                 return true;
3243                         case 'caption':
3244                         case 'col':
3245                         case 'colgroup':
3246                         case 'tbody':
3247                         case 'tfoot':
3248                         case 'thead':
3249                                 if ( $this->endSection() ) {
3250                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3251                                 }
3252                                 return true;
3253                         }
3254                 } elseif ( $token === 'endtag' ) {
3255                         switch ( $value ) {
3256                         case 'table':
3257                                 if ( $this->endSection() ) {
3258                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3259                                 }
3260                                 return true;
3261                         case 'tbody':
3262                         case 'tfoot':
3263                         case 'thead':
3264                                 if ( $this->stack->inTableScope( $value ) ) {
3265                                         $this->endSection();
3266                                 }
3267                                 return true;
3268                         // OMITTED: <body>
3269                         case 'caption':
3270                         case 'col':
3271                         case 'colgroup':
3272                         // OMITTED: <html>
3273                         case 'td':
3274                         case 'th':
3275                         case 'tr':
3276                                 return true; // Ignore the token.
3277                         }
3278                 }
3279                 // Anything else:
3280                 return $this->inTableMode( $token, $value, $attribs, $selfClose );
3281         }
3282
3283         // Helper function for inRowMode
3284         private function endRow() {
3285                 if ( !$this->stack->inTableScope( 'tr' ) ) {
3286                         return false;
3287                 }
3288                 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3289                 $this->stack->pop();
3290                 $this->switchMode( 'inTableBodyMode' );
3291                 return true;
3292         }
3293         private function inRowMode( $token, $value, $attribs = null, $selfClose = false ) {
3294                 if ( $token === 'tag' ) {
3295                         switch ( $value ) {
3296                         case 'th':
3297                         case 'td':
3298                                 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3299                                 $this->stack->insertHTMLElement( $value, $attribs );
3300                                 $this->switchMode( 'inCellMode' );
3301                                 $this->afe->insertMarker();
3302                                 return true;
3303                         case 'caption':
3304                         case 'col':
3305                         case 'colgroup':
3306                         case 'tbody':
3307                         case 'tfoot':
3308                         case 'thead':
3309                         case 'tr':
3310                                 if ( $this->endRow() ) {
3311                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3312                                 }
3313                                 return true;
3314                         }
3315                 } elseif ( $token === 'endtag' ) {
3316                         switch ( $value ) {
3317                         case 'tr':
3318                                 $this->endRow();
3319                                 return true;
3320                         case 'table':
3321                                 if ( $this->endRow() ) {
3322                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3323                                 }
3324                                 return true;
3325                         case 'tbody':
3326                         case 'tfoot':
3327                         case 'thead':
3328                                 if (
3329                                         $this->stack->inTableScope( $value ) &&
3330                                         $this->endRow()
3331                                 ) {
3332                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3333                                 }
3334                                 return true;
3335                         // OMITTED: <body>
3336                         case 'caption':
3337                         case 'col':
3338                         case 'colgroup':
3339                         // OMITTED: <html>
3340                         case 'td':
3341                         case 'th':
3342                                 return true; // Ignore the token.
3343                         }
3344                 }
3345                 // Anything else:
3346                 return $this->inTableMode( $token, $value, $attribs, $selfClose );
3347         }
3348
3349         // Helper for inCellMode
3350         private function endCell() {
3351                 if ( $this->stack->inTableScope( 'td' ) ) {
3352                         $this->inCellMode( 'endtag', 'td' );
3353                         return true;
3354                 } elseif ( $this->stack->inTableScope( 'th' ) ) {
3355                         $this->inCellMode( 'endtag', 'th' );
3356                         return true;
3357                 } else {
3358                         return false;
3359                 }
3360         }
3361         private function inCellMode( $token, $value, $attribs = null, $selfClose = false ) {
3362                 if ( $token === 'tag' ) {
3363                         switch ( $value ) {
3364                         case 'caption':
3365                         case 'col':
3366                         case 'colgroup':
3367                         case 'tbody':
3368                         case 'td':
3369                         case 'tfoot':
3370                         case 'th':
3371                         case 'thead':
3372                         case 'tr':
3373                                 if ( $this->endCell() ) {
3374                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3375                                 }
3376                                 return true;
3377                         }
3378                 } elseif ( $token === 'endtag' ) {
3379                         switch ( $value ) {
3380                         case 'td':
3381                         case 'th':
3382                                 if ( $this->stack->inTableScope( $value ) ) {
3383                                         $this->stack->generateImpliedEndTags();
3384                                         $this->stack->popTag( $value );
3385                                         $this->afe->clearToMarker();
3386                                         $this->switchMode( 'inRowMode' );
3387                                 }
3388                                 return true;
3389                         // OMITTED: <body>
3390                         case 'caption':
3391                         case 'col':
3392                         case 'colgroup':
3393                         // OMITTED: <html>
3394                                 return true;
3395
3396                         case 'table':
3397                         case 'tbody':
3398                         case 'tfoot':
3399                         case 'thead':
3400                         case 'tr':
3401                                 if ( $this->stack->inTableScope( $value ) ) {
3402                                         $this->stack->generateImpliedEndTags();
3403                                         $this->stack->popTag( BalanceSets::$tableCellSet );
3404                                         $this->afe->clearToMarker();
3405                                         $this->switchMode( 'inRowMode' );
3406                                         $this->insertToken( $token, $value, $attribs, $selfClose );
3407                                 }
3408                                 return true;
3409                         }
3410                 }
3411                 // Anything else:
3412                 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3413         }
3414
3415         private function inSelectMode( $token, $value, $attribs = null, $selfClose = false ) {
3416                 if ( $token === 'text' ) {
3417                         $this->stack->insertText( $value );
3418                         return true;
3419                 } elseif ( $token === 'eof' ) {
3420                         return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3421                 } elseif ( $token === 'tag' ) {
3422                         switch ( $value ) {
3423                         // OMITTED: <html>
3424                         case 'option':
3425                                 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3426                                         $this->stack->pop();
3427                                 }
3428                                 $this->stack->insertHTMLElement( $value, $attribs );
3429                                 return true;
3430                         case 'optgroup':
3431                                 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3432                                         $this->stack->pop();
3433                                 }
3434                                 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3435                                         $this->stack->pop();
3436                                 }
3437                                 $this->stack->insertHTMLElement( $value, $attribs );
3438                                 return true;
3439                         case 'select':
3440                                 $this->inSelectMode( 'endtag', $value ); // treat it like endtag
3441                                 return true;
3442                         case 'input':
3443                         case 'keygen':
3444                         case 'textarea':
3445                                 if ( !$this->stack->inSelectScope( 'select' ) ) {
3446                                         return true; // ignore token (fragment case)
3447                                 }
3448                                 $this->inSelectMode( 'endtag', 'select' );
3449                                 return $this->insertToken( $token, $value, $attribs, $selfClose );
3450                         case 'script':
3451                         case 'template':
3452                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3453                         }
3454                 } elseif ( $token === 'endtag' ) {
3455                         switch ( $value ) {
3456                         case 'optgroup':
3457                                 if (
3458                                         $this->stack->currentNode->isHtmlNamed( 'option' ) &&
3459                                         $this->stack->length() >= 2 &&
3460                                         $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
3461                                 ) {
3462                                         $this->stack->pop();
3463                                 }
3464                                 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3465                                         $this->stack->pop();
3466                                 }
3467                                 return true;
3468                         case 'option':
3469                                 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3470                                         $this->stack->pop();
3471                                 }
3472                                 return true;
3473                         case 'select':
3474                                 if ( !$this->stack->inSelectScope( $value ) ) {
3475                                         return true; // fragment case
3476                                 }
3477                                 $this->stack->popTag( $value );
3478                                 $this->resetInsertionMode();
3479                                 return true;
3480                         case 'template':
3481                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3482                         }
3483                 } elseif ( $token === 'comment' ) {
3484                         $this->stack->insertComment( $value );
3485                         return true;
3486                 }
3487                 // anything else: just ignore the token
3488                 return true;
3489         }
3490
3491         private function inSelectInTableMode( $token, $value, $attribs = null, $selfClose = false ) {
3492                 switch ( $value ) {
3493                 case 'caption':
3494                 case 'table':
3495                 case 'tbody':
3496                 case 'tfoot':
3497                 case 'thead':
3498                 case 'tr':
3499                 case 'td':
3500                 case 'th':
3501                         if ( $token === 'tag' ) {
3502                                 $this->inSelectInTableMode( 'endtag', 'select' );
3503                                 return $this->insertToken( $token, $value, $attribs, $selfClose );
3504                         } elseif ( $token === 'endtag' ) {
3505                                 if ( $this->stack->inTableScope( $value ) ) {
3506                                         $this->inSelectInTableMode( 'endtag', 'select' );
3507                                         return $this->insertToken( $token, $value, $attribs, $selfClose );
3508                                 }
3509                                 return true;
3510                         }
3511                 }
3512                 // anything else
3513                 return $this->inSelectMode( $token, $value, $attribs, $selfClose );
3514         }
3515
3516         private function inTemplateMode( $token, $value, $attribs = null, $selfClose = false ) {
3517                 if ( $token === 'text' || $token === 'comment' ) {
3518                         return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3519                 } elseif ( $token === 'eof' ) {
3520                         if ( $this->stack->indexOf( 'template' ) < 0 ) {
3521                                 $this->stopParsing();
3522                         } else {
3523                                 $this->stack->popTag( 'template' );
3524                                 $this->afe->clearToMarker();
3525                                 array_pop( $this->templateInsertionModes );
3526                                 $this->resetInsertionMode();
3527                                 $this->insertToken( $token, $value, $attribs, $selfClose );
3528                         }
3529                         return true;
3530                 } elseif ( $token === 'tag' ) {
3531                         switch ( $value ) {
3532                         case 'base':
3533                         case 'basefont':
3534                         case 'bgsound':
3535                         case 'link':
3536                         case 'meta':
3537                         case 'noframes':
3538                         // OMITTED: <script>
3539                         case 'style':
3540                         case 'template':
3541                         // OMITTED: <title>
3542                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3543
3544                         case 'caption':
3545                         case 'colgroup':
3546                         case 'tbody':
3547                         case 'tfoot':
3548                         case 'thead':
3549                                 return $this->switchModeAndReprocess(
3550                                         'inTableMode', $token, $value, $attribs, $selfClose
3551                                 );
3552
3553                         case 'col':
3554                                 return $this->switchModeAndReprocess(
3555                                         'inColumnGroupMode', $token, $value, $attribs, $selfClose
3556                                 );
3557
3558                         case 'tr':
3559                                 return $this->switchModeAndReprocess(
3560                                         'inTableBodyMode', $token, $value, $attribs, $selfClose
3561                                 );
3562
3563                         case 'td':
3564                         case 'th':
3565                                 return $this->switchModeAndReprocess(
3566                                         'inRowMode', $token, $value, $attribs, $selfClose
3567                                 );
3568                         }
3569                         return $this->switchModeAndReprocess(
3570                                 'inBodyMode', $token, $value, $attribs, $selfClose
3571                         );
3572                 } elseif ( $token === 'endtag' ) {
3573                         switch ( $value ) {
3574                         case 'template':
3575                                 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3576                         }
3577                         return true;
3578                 } else {
3579                         Assert::invariant( false, "Bad token type: $token" );
3580                 }
3581         }
3582 }