true,
'tbody' => true,
'tfoot' => true,
'thead' => true,
'tr' => true
];
private static $impliedEndTags = [
'dd' => true,
'dt' => true,
'li' => true,
'option' => true,
'optgroup' => true,
'p' => true,
'rb' => true,
'rp' => true,
'rt' => true,
'rtc' => true,
];
private static $thoroughlyImpliedEndTags = [
'caption' => true,
'colgroup' => true,
'dd' => true,
'dt' => true,
'li' => true,
'optgroup' => true,
'option' => true,
'p' => true,
'rb' => true,
'rp' => true,
'rt' => true,
'rtc' => true,
'tbody' => true,
'td' => true,
'tfoot' => true,
'th' => true,
'thead' => true,
'tr' => true
];
public function __construct( TreeHandler $handler, $options = [] ) {
$this->handler = $handler;
$this->afe = new ActiveFormattingElements;
$options = $options + [
'isIframeSrcdoc' => false,
'scriptingFlag' => true,
'ignoreErrors' => false,
'ignoreNulls' => false,
'scopeCache' => true,
];
$this->isIframeSrcdoc = $options['isIframeSrcdoc'];
$this->scriptingFlag = $options['scriptingFlag'];
$this->ignoreErrors = $options['ignoreErrors'];
$this->ignoreNulls = $options['ignoreNulls'];
if ( $options['scopeCache'] ) {
$this->stack = new CachingStack;
} else {
$this->stack = new SimpleStack;
}
}
public function __set( $name, $value ) {
PropGuard::set( $this, $name, $value );
}
public function startDocument( Tokenizer $tokenizer, $namespace, $name ) {
$tokenizer->setEnableCdataCallback(
function () {
$acn = $this->adjustedCurrentNode();
return $acn && $acn->namespace !== HTMLData::NS_HTML;
}
);
$this->tokenizer = $tokenizer;
$this->handler->startDocument( $namespace, $name );
if ( $namespace !== null ) {
$this->isFragment = true;
$this->fragmentContext = new Element( $namespace, $name, new PlainAttributes );
$this->fragmentContext->isVirtual = true;
$html = new Element( HTMLData::NS_HTML, 'html', new PlainAttributes );
$html->isVirtual = true;
$this->stack->push( $html );
$this->handler->insertElement( self::ROOT, null, $html, false, 0, 0 );
}
}
/**
* Get the adjusted current node
* @return Element|null
*/
public function adjustedCurrentNode() {
$current = $this->stack->current;
if ( $this->isFragment && ( !$current || $current->stackIndex === 0 ) ) {
return $this->fragmentContext;
} else {
return $current;
}
}
private function appropriatePlace( $target = null ) {
$stack = $this->stack;
if ( $target === null ) {
$target = $stack->current;
}
if ( $target === null ) {
return [ self::ROOT, null ];
}
if ( !$this->fosterParenting ) {
return [ self::UNDER, $target ];
}
if ( !isset( self::$fosterTriggers[$target->htmlName] ) ) {
return [ self::UNDER, $target ];
}
$node = null;
for ( $idx = $this->stack->length() - 1; $idx >= 0; $idx-- ) {
$node = $this->stack->item( $idx );
if ( $node->htmlName === 'table' && $idx >= 1 ) {
return [ self::BEFORE, $node ];
}
if ( $node->htmlName === 'template' ) {
return [ self::UNDER, $node ];
}
}
return [ self::UNDER, $node ];
}
public function insertCharacters( $text, $start, $length, $sourceStart, $sourceLength ) {
list( $prep, $ref ) = $this->appropriatePlace();
$this->handler->characters( $prep, $ref, $text, $start, $length,
$sourceStart, $sourceLength );
}
public function insertElement( $name, Attributes $attrs, $void, $sourceStart, $sourceLength ) {
return $this->insertForeign( HTMLData::NS_HTML, $name, $attrs, $void,
$sourceStart, $sourceLength );
}
public function insertForeign( $ns, $name, Attributes $attrs, $void,
$sourceStart, $sourceLength
) {
list( $prep, $ref ) = $this->appropriatePlace();
$element = new Element( $ns, $name, $attrs );
$this->handler->insertElement( $prep, $ref, $element, $void,
$sourceStart, $sourceLength );
if ( !$void ) {
$this->stack->push( $element );
}
return $element;
}
/**
* Pop the current node from the stack of open elements, and notify the
* handler that we are done with that node.
*/
public function pop( $sourceStart, $sourceLength ) {
$element = $this->stack->pop();
$this->handler->endTag( $element, $sourceStart, $sourceLength );
return $element;
}
public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
$this->handler->doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength );
$this->quirks = $quirks;
}
public function comment( $place, $text, $sourceStart, $sourceLength ) {
list( $prep, $ref ) = $place !== null ? $place : $this->appropriatePlace();
$this->handler->comment( $prep, $ref, $text, $sourceStart, $sourceLength );
}
public function error( $text, $pos ) {
if ( !$this->ignoreErrors ) {
$this->handler->error( $text, $pos );
}
}
public function mergeAttributes( Element $elt, Attributes $attrs, $sourceStart, $sourceLength ) {
if ( $attrs->count() && !$elt->isVirtual ) {
$this->handler->mergeAttributes( $elt, $attrs, $sourceStart, $sourceLength );
}
}
public function closePInButtonScope( $pos ) {
if ( $this->stack->isInButtonScope( 'p' ) ) {
$this->generateImpliedEndTagsAndPop( 'p', $pos, 0 );
}
}
/**
* Check the stack to see if there is any element which is not on the list
* of allowed elements. Raise an error if any are found.
*
* @param array $allowed An array with the HTML element names in the key
*/
public function checkUnclosed( $allowed, $pos ) {
if ( $this->ignoreErrors ) {
return;
}
$stack = $this->stack;
$unclosedErrors = [];
for ( $i = $stack->length() - 1; $i >= 0; $i-- ) {
$unclosedName = $stack->item( $i )->htmlName;
if ( !isset( $allowed[$unclosedName] ) ) {
$unclosedErrors[$unclosedName] = true;
}
}
if ( $unclosedErrors ) {
$names = implode( ', ', array_keys( $unclosedErrors ) );
$this->error( "closing unclosed $names", $pos );
}
}
/**
* Reconstruct the active formatting elements.
* @author C. Scott Ananian, Tim Starling
*/
public function reconstructAFE( $sourceStart ) {
$entry = $this->afe->getTail();
// If there are no entries in the list of active formatting elements,
// then there is nothing to reconstruct
if ( !$entry ) {
return;
}
// If the last is a marker, do nothing.
if ( $entry instanceof Marker ) {
return;
}
// Or if it is an open element, do nothing.
if ( $entry->stackIndex !== null ) {
return;
}
// Loop backward through the list until we find a marker or an
// open element
$foundIt = false;
while ( $entry->prevAFE ) {
$entry = $entry->prevAFE;
if ( $entry instanceof Marker || $entry->stackIndex !== null ) {
$foundIt = true;
break;
}
}
// Now loop forward, starting from the element after the current one (or
// the first element if we didn't find a marker or open element),
// recreating formatting elements and pushing them back onto the list
// of open elements.
if ( $foundIt ) {
$entry = $entry->nextAFE;
}
do {
$newElement = $this->insertForeign( HTMLData::NS_HTML, $entry->name,
$entry->attrs, false, $sourceStart, 0 );
$this->afe->replace( $entry, $newElement );
$entry = $newElement->nextAFE;
} while ( $entry );
}
private function trace( $msg ) {
// print "[AAA] $msg\n";
}
/**
* Run the "adoption agency algorithm" (AAA) for the given subject
* tag name.
* @author C. Scott Ananian, Tim Starling
*
* @param string $subject The subject tag name.
* @param integer $sourceStart
* @param integer $sourceLength
*/
public function adoptionAgency( $subject, $sourceStart, $sourceLength ) {
$afe = $this->afe;
$stack = $this->stack;
$handler = $this->handler;
// If the current node is an HTML element whose tag name is subject,
// and the current node is not in the list of active formatting
// elements, then pop the current node off the stack of open
// elements and abort these steps. [1]
if (
$stack->current->htmlName === $subject &&
!$afe->isInList( $stack->current )
) {
$this->pop( $sourceStart, $sourceLength );
return;
}
$this->trace( "AAA invoked on $subject" );
// Outer loop: If outer loop counter is greater than or
// equal to eight, then abort these steps. [2-4]
for ( $outer = 0; $outer < 8; $outer++ ) {
$this->trace( "Outer $outer" );
$this->trace( "AFE\n" . $afe->dump() . "STACK\n" . $stack->dump() );
// Let the formatting element be the last element in the list
// of active formatting elements that: is between the end of
// the list and the last scope marker in the list, if any, or
// the start of the list otherwise, and has the same tag name
// as the token. [5]
$fmtElt = $afe->findElementByName( $subject );
// If there is no such node, then abort these steps and instead
// act as described in the "any other end tag" entry above.
if ( !$fmtElt ) {
$this->anyOtherEndTag( $subject, $sourceStart, $sourceLength );
return;
}
// Otherwise, if there is such a node, but that node is not in
// the stack of open elements, then this is a parse error;
// remove the element from the list, and abort these steps. [6]
$fmtEltIndex = $fmtElt->stackIndex;
if ( $fmtEltIndex === null ) {
$this->error( 'closing tag matched an active formatting element ' .
'which is not in the stack', $sourceStart );
$afe->remove( $fmtElt );
return;
}
// Otherwise, if there is such a node, and that node is also in
// the stack of open elements, but the element is not in scope,
// then this is a parse error; ignore the token, and abort
// these steps. [7]
if ( !$stack->isElementInScope( $fmtElt ) ) {
$this->error( 'end tag matched a start tag which is not in scope',
$sourceStart );
return;
}
// If formatting element is not the current node, this is a parse
// error. (But do not abort these steps.) [8]
if ( $fmtElt !== $stack->current ) {
$this->error( 'end tag matched a formatting element which was ' .
'not the current node', $sourceStart );
}
// Let the furthest block be the topmost node in the stack of
// open elements that is lower in the stack than the formatting
// element, and is an element in the special category. There
// might not be one. [9]
$furthestBlock = null;
$furthestBlockIndex = -1;
$stackLength = $stack->length();
for ( $i = $fmtEltIndex+1; $i < $stackLength; $i++ ) {
$item = $stack->item( $i );
if ( isset( HTMLData::$special[$item->namespace][$item->name] ) ) {
$furthestBlock = $item;
$furthestBlockIndex = $i;
break;
}
}
// If there is no furthest block, then the UA must skip the
// subsequent steps and instead just pop all the nodes from the
// bottom of the stack of open elements, from the current node up
// to and including the formatting element, and remove the
// formatting element from the list of active formatting
// elements. [10]
if ( !$furthestBlock ) {
$this->trace( "no furthest block" );
$this->popAllUpToElement( $fmtElt, $sourceStart, $sourceLength );
$afe->remove( $fmtElt );
return;
}
$this->trace( "furthestBlock = " . $furthestBlock->getDebugTag() );
// Let the common ancestor be the element immediately above the
// formatting element in the stack of open elements. [11]
$ancestor = $stack->item( $fmtEltIndex - 1 );
// Let a bookmark note the position of the formatting element in
// the list of active formatting elements relative to the elements
// on either side of it in the list. [12]
$bookmark = new Marker( 'bookmark' );
$afe->insertAfter( $fmtElt, $bookmark );
// Let node and last node be the furthest block. [13]
$lastNode = $furthestBlock;
$nodeIndex = $furthestBlockIndex;
$isAFE = false;
$stackRemovals = [];
$insertions = [];
// Inner loop
for ( $inner = 1; true; $inner++ ) {
// Let node be the element immediately above node in the stack
// of open elements, or if node is no longer in the stack of
// open elements (e.g. because it got removed by this
// algorithm), the element that was immediately above node in
// the stack of open elements before node was removed. [13.3]
$node = $stack->item( --$nodeIndex );
// If node is the formatting element, then go to the next step
// in the overall algorithm. [13.4]
if ( $node === $fmtElt ) {
break;
}
$this->trace( "inner $inner, {$node->getDebugTag()} is not fmtElt" );
// If the inner loop counter is greater than three and node
// is in the list of active formatting elements, then remove
// node from the list of active formatting elements. [13.5]
$isAFE = $afe->isInList( $node );
if ( $inner > 3 && $isAFE ) {
$afe->remove( $node );
$isAFE = false;
}
// If node is not in the list of active formatting elements,
// then remove node from the stack of open elements and then
// go back to the step labeled inner loop. [13.6]
if ( !$isAFE ) {
$stackRemovals[$nodeIndex] = true;
continue;
}
// Create an element for the token for which the element node
// was created with common ancestor as the intended parent,
// replace the entry for node in the list of active formatting
// elements with an entry for the new element, replace the
// entry for node in the stack of open elements with an entry
// for the new element, and let node be the new element. [13.7]
$newElt = new Element(
$node->namespace, $node->name, $node->attrs );
$afe->replace( $node, $newElt );
$stack->replace( $node, $newElt );
$node = $newElt;
// If last node is the furthest block, then move the
// aforementioned bookmark to be immediately after the new node
// in the list of active formatting elements. [13.8]
if ( $lastNode === $furthestBlock ) {
$afe->remove( $bookmark );
$afe->insertAfter( $newElt, $bookmark );
}
// Insert last node into node, first removing it from its
// previous parent node if any. [13.9]
$insertions[] = [ self::UNDER, $node, $lastNode ];
// Let last node be node. [13.10]
$lastNode = $node;
}
// Insert whatever last node ended up being in the previous step at
// the appropriate place for inserting a node, but using common
// ancestor as the override target. [14]
list( $prep, $ref ) = $this->appropriatePlace( $ancestor );
$insertions[] = [ $prep, $ref, $lastNode ];
// Execute queued insertions in reverse order.
// This has the same effect but allows the handler to assume that
// elements are always in the tree.
for ( $i = count( $insertions ) - 1; $i >= 0; $i-- ) {
$ins = $insertions[$i];
$handler->insertElement( $ins[0], $ins[1], $ins[2], false, $sourceStart, 0 );
}
// Create an element for the token for which the formatting element
// was created, with furthest block as the intended parent. [15]
$newElt2 = new Element(
$fmtElt->namespace, $fmtElt->name, $fmtElt->attrs );
// Take all of the child nodes of the furthest block and append
// them to the element created in the last step. [16]
// Append the new element to the furthest block. [17]
$handler->reparentChildren( $furthestBlock, $newElt2, $sourceStart );
// Remove the formatting element from the list of active formatting
// elements, and insert the new element into the list of active
// formatting elements at the position of the aforementioned
// bookmark. [18]
$afe->remove( $fmtElt );
$afe->replace( $bookmark, $newElt2 );
// Remove the formatting element from the stack of open elements,
// and insert the new element into the stack of open elements
// immediately below the position of the furthest block in that
// stack. [19]
$this->trace( "Removing " . $stack->length() . "-" . ( $furthestBlockIndex + 1 ) );
$this->trace( "Inserting the new element below $furthestBlockIndex" );
$this->trace( "Removing stack elements " .
implode( ', ', array_keys( $stackRemovals ) ) );
// Make a temporary stack with the elements we are going to push back in
$tempStack = [];
// Stash the elements up to the furthest block
for ( $index = $stack->length() - 1; $index > $furthestBlockIndex; $index-- ) {
$tempStack[] = $stack->pop();
}
// Add the new element
$tempStack[] = $newElt2;
// Stash the elements up to the formatting element
for ( 0; $index > $fmtEltIndex; $index-- ) {
$elt = $stack->pop();
// Drop elements previously marked for removal
if ( isset( $stackRemovals[$index] ) ) {
$this->trace( "ending marked node {$elt->getDebugTag()}" );
$handler->endTag( $elt, $sourceStart, 0 );
} else {
$tempStack[] = $elt;
}
}
// Remove the formatting element
$elt = $stack->pop();
$this->trace( "ending formatting element {$elt->getDebugTag()}" );
$handler->endTag( $elt, $sourceStart, 0 );
// Reinsert
foreach ( array_reverse( $tempStack ) as $elt ) {
$stack->push( $elt );
}
}
}
public function anyOtherEndTag( $name, $sourceStart, $sourceLength ) {
$stack = $this->stack;
for ( $index = $stack->length() - 1; $index >= 0; $index-- ) {
$node = $stack->item( $index );
if ( $node->htmlName === $name ) {
$this->generateImpliedEndTags( $name, $sourceStart );
// If node is not the current node, then this is a parse error
if ( $node !== $stack->current ) {
$this->error( 'end tag matched an element which was not the current node',
$sourceStart );
}
// Pop all the nodes from the current node up to node, including
// node, then stop these steps.
for ( $j = $stack->length() - 1; $j > $index; $j-- ) {
$elt = $stack->pop();
$this->handler->endTag( $elt, $sourceStart, 0 );
}
$elt = $stack->pop();
$this->handler->endTag( $elt, $sourceStart, $sourceLength );
return;
}
// If node is in the special category, then this is a parse error;
// ignore the token, and abort these steps
if ( isset( HTMLData::$special[$node->namespace][$node->name] ) ) {
$this->error( "cannot implicitly close a special element <{$node->htmlName}>",
$sourceStart );
return;
}
}
}
/**
* Generate implied end tags, optionally with an element to exclude.
*
* @param string|null $name The name to exclude
* @param integer $pos The source position
*/
public function generateImpliedEndTags( $name, $pos ) {
$stack = $this->stack;
$current = $stack->current;
while ( $current && $current->htmlName !== $name &&
isset( self::$impliedEndTags[$current->htmlName] )
) {
$popped = $stack->pop();
$this->handler->endTag( $popped, $pos, 0 );
$current = $stack->current;
}
}
/**
* Generate all implied end tags thoroughly. This was introduced in
* HTML 5.1 in order to expand the set of elements which can be implicitly
* closed by a .
*/
public function generateImpliedEndTagsThoroughly( $pos ) {
$stack = $this->stack;
$current = $stack->current;
while ( $current && isset( self::$thoroughlyImpliedEndTags[$current->htmlName] ) ) {
$popped = $stack->pop();
$this->handler->endTag( $popped, $pos, 0 );
$current = $stack->current;
}
}
/**
* Generate implied end tags, with an element to exclude, and if the
* current element is not now the named excluded element, raise an error.
* Then, pop all elements until an element with the name is popped from
* the list.
*
* @param string $name The name to exclude
* @param integer $sourceStart
* @param integer $sourceLength
*/
public function generateImpliedEndTagsAndPop( $name, $sourceStart, $sourceLength ) {
$this->generateImpliedEndTags( $name, $sourceStart );
if ( $this->stack->current->htmlName !== $name ) {
$this->error( "found $name> but elements are open that cannot " .
"have implied end tags, closing them", $sourceStart );
}
$this->popAllUpToName( $name, $sourceStart, $sourceLength );
}
public function popAllUpToElement( Element $elt, $sourceStart, $sourceLength ) {
while ( true ) {
$popped = $this->stack->pop();
if ( !$popped ) {
break;
} elseif ( $popped === $elt ) {
$this->handler->endTag( $popped, $sourceStart, $sourceLength );
break;
} else {
$this->handler->endTag( $popped, $sourceStart, 0 );
}
}
}
public function popAllUpToName( $name, $sourceStart, $sourceLength ) {
while ( true ) {
$popped = $this->stack->pop();
if ( !$popped ) {
break;
} elseif ( $popped->htmlName === $name ) {
$this->handler->endTag( $popped, $sourceStart, $sourceLength );
break;
} else {
$this->handler->endTag( $popped, $sourceStart, 0 );
}
}
}
public function popAllUpToNames( $names, $sourceStart, $sourceLength ) {
while ( true ) {
$popped = $this->stack->pop();
if ( !$popped ) {
break;
} elseif ( isset( $names[$popped->htmlName] ) ) {
$this->handler->endTag( $popped, $sourceStart, $sourceLength );
break;
} else {
$this->handler->endTag( $popped, $sourceStart, 0 );
}
}
}
/**
* The "clear stack back to" algorithm used by several template insertion
* modes. Similar to popAllUpToName(), except that the named element is
* not popped, and a set of names is used instead of a single name.
*
* @param array $names
* @param integer $pos
*/
public function clearStackBack( $names, $pos ) {
$stack = $this->stack;
while ( $stack->current && !isset( $names[$stack->current->htmlName] ) ) {
$this->pop( $pos, 0 );
}
if ( !$stack->current ) {
throw new TreeBuilderError( 'clearStackBack: stack is unexpectedly empty' );
}
}
public function stopParsing( $pos ) {
$stack = $this->stack;
while ( $stack->current ) {
$popped = $stack->pop();
if ( !$this->isFragment || $popped->htmlName !== 'html' ) {
$this->handler->endTag( $popped, $pos, 0 );
}
}
$this->handler->endDocument( $pos );
$this->afe = new ActiveFormattingElements;
$this->headElement = null;
$this->formElement = null;
$this->tokenizer->setEnableCdataCallback( null );
$this->tokenizer = null;
}
}