3 namespace RemexHtml\TreeBuilder;
4 use RemexHtml\HTMLData;
5 use RemexHtml\Tokenizer\Attributes;
8 * An implementation of the "stack of open elements" which includes a cache of
9 * elements currently in the various kinds of scope. It presumably has good
10 * worst-case performance at the expense of somewhat slower updates.
12 class CachingStack extends Stack {
13 const SCOPE_DEFAULT = 0;
15 const SCOPE_BUTTON = 2;
16 const SCOPE_TABLE = 3;
17 const SCOPE_SELECT = 4;
19 private static $tableScopes = [ self::SCOPE_DEFAULT, self::SCOPE_LIST, self::SCOPE_BUTTON,
20 self::SCOPE_TABLE, self::SCOPE_SELECT ];
21 private static $regularScopes = [ self::SCOPE_DEFAULT, self::SCOPE_LIST, self::SCOPE_BUTTON,
23 private static $listScopes = [ self::SCOPE_LIST, self::SCOPE_SELECT ];
24 private static $buttonScopes = [ self::SCOPE_BUTTON, self::SCOPE_SELECT ];
25 private static $selectOnly = [ self::SCOPE_SELECT ];
27 private static $mathBreakers = [
33 'annotation-xml' => true
36 private static $svgBreakers = [
37 'foreignObject' => true,
43 * If you compile every predicate of the form "an X element in Y scope" in
44 * the HTML 5 spec, you discover that every element name X corresponds to
45 * at most one such scope Y. This is useful because it means when we see
46 * a new element, we need to add it to at most one scope cache.
48 * This is a list of such statements in the spec, and the scope they relate
49 * to. All formatting elements are included as SCOPE_DEFAULT since the AAA
50 * involves pulling an item out of the AFE list and checking if it is in
53 static private $predicateMap = [
54 'a' => self::SCOPE_DEFAULT,
55 'address' => self::SCOPE_DEFAULT,
56 'applet' => self::SCOPE_DEFAULT,
57 'article' => self::SCOPE_DEFAULT,
58 'aside' => self::SCOPE_DEFAULT,
59 'b' => self::SCOPE_DEFAULT,
60 'big' => self::SCOPE_DEFAULT,
61 'blockquote' => self::SCOPE_DEFAULT,
62 'body' => self::SCOPE_DEFAULT,
63 'button' => self::SCOPE_DEFAULT,
64 'caption' => self::SCOPE_TABLE,
65 'center' => self::SCOPE_DEFAULT,
66 'code' => self::SCOPE_DEFAULT,
67 'dd' => self::SCOPE_DEFAULT,
68 'details' => self::SCOPE_DEFAULT,
69 'dialog' => self::SCOPE_DEFAULT,
70 'dir' => self::SCOPE_DEFAULT,
71 'div' => self::SCOPE_DEFAULT,
72 'dl' => self::SCOPE_DEFAULT,
73 'dt' => self::SCOPE_DEFAULT,
74 'em' => self::SCOPE_DEFAULT,
75 'fieldset' => self::SCOPE_DEFAULT,
76 'figcaption' => self::SCOPE_DEFAULT,
77 'figure' => self::SCOPE_DEFAULT,
78 'font' => self::SCOPE_DEFAULT,
79 'footer' => self::SCOPE_DEFAULT,
80 'form' => self::SCOPE_DEFAULT,
81 'h1' => self::SCOPE_DEFAULT,
82 'h2' => self::SCOPE_DEFAULT,
83 'h3' => self::SCOPE_DEFAULT,
84 'h4' => self::SCOPE_DEFAULT,
85 'h5' => self::SCOPE_DEFAULT,
86 'h6' => self::SCOPE_DEFAULT,
87 'header' => self::SCOPE_DEFAULT,
88 'hgroup' => self::SCOPE_DEFAULT,
89 'i' => self::SCOPE_DEFAULT,
90 'li' => self::SCOPE_LIST,
91 'listing' => self::SCOPE_DEFAULT,
92 'main' => self::SCOPE_DEFAULT,
93 'marquee' => self::SCOPE_DEFAULT,
94 'menu' => self::SCOPE_DEFAULT,
95 'nav' => self::SCOPE_DEFAULT,
96 'nobr' => self::SCOPE_DEFAULT,
97 'object' => self::SCOPE_DEFAULT,
98 'ol' => self::SCOPE_DEFAULT,
99 'p' => self::SCOPE_BUTTON,
100 'pre' => self::SCOPE_DEFAULT,
101 'ruby' => self::SCOPE_DEFAULT,
102 's' => self::SCOPE_DEFAULT,
103 'section' => self::SCOPE_DEFAULT,
104 'select' => self::SCOPE_SELECT,
105 'small' => self::SCOPE_DEFAULT,
106 'strike' => self::SCOPE_DEFAULT,
107 'strong' => self::SCOPE_DEFAULT,
108 'summary' => self::SCOPE_DEFAULT,
109 'table' => self::SCOPE_TABLE,
110 'tbody' => self::SCOPE_TABLE,
111 'td' => self::SCOPE_TABLE,
112 'tfoot' => self::SCOPE_TABLE,
113 'th' => self::SCOPE_TABLE,
114 'thead' => self::SCOPE_TABLE,
115 'tr' => self::SCOPE_TABLE,
116 'tt' => self::SCOPE_DEFAULT,
117 'u' => self::SCOPE_DEFAULT,
118 'ul' => self::SCOPE_DEFAULT,
122 * The stack of open elements
124 private $elements = [];
127 * A cache of the elements which are currently in a given scope.
128 * The first key is the scope ID, the second key is the element name, and the
129 * value is the first Element in a singly-linked list of Element objects,
130 * linked by $element->nextScope.
132 * @todo Benchmark time and memory compared to an array stack instead of an
133 * SLL. The SLL here is maybe not quite so well justified as some other
136 * @var Element[int][string]
139 self::SCOPE_DEFAULT => [],
140 self::SCOPE_LIST => [],
141 self::SCOPE_BUTTON => [],
142 self::SCOPE_TABLE => [],
143 self::SCOPE_SELECT => []
147 * This is the part of the scope cache which stores scope lists for objects
148 * which are not currently in scope. The first key is the scope ID, the
149 * second key is the stack index, the third key is the element name.
151 * @var Element[int][int][string]
153 private $scopeStacks = [
154 self::SCOPE_DEFAULT => [],
155 self::SCOPE_LIST => [],
156 self::SCOPE_BUTTON => [],
157 self::SCOPE_TABLE => [],
158 self::SCOPE_SELECT => []
162 * The number of <template> elements in the stack of open elements. This
163 * speeds up the hot function hasTemplate().
165 private $templateCount;
168 * Get the list of scopes that are broken for a given namespace and
171 private function getBrokenScopes( $ns, $name ) {
172 if ( $ns === HTMLData::NS_HTML ) {
177 return self::$tableScopes;
185 return self::$regularScopes;
189 return self::$listScopes;
192 return self::$buttonScopes;
199 return self::$selectOnly;
201 } elseif ( $ns === HTMLData::NS_MATHML ) {
202 if ( isset( self::$mathBreakers[$name] ) ) {
203 return self::$regularScopes;
205 return self::$selectOnly;
207 } elseif ( $ns === HTMLData::NS_SVG ) {
208 if ( isset( self::$svgBreakers[$name] ) ) {
209 return self::$regularScopes;
211 return self::$selectOnly;
214 return self::$selectOnly;
218 public function push( Element $elt ) {
219 // Update the stack store
220 $n = count( $this->elements );
221 $this->elements[$n] = $elt;
222 // Update the current node and index cache
223 $this->current = $elt;
224 $elt->stackIndex = $n;
225 // Update the scope cache
226 $ns = $elt->namespace;
228 foreach ( $this->getBrokenScopes( $ns, $name ) as $scope ) {
229 $this->scopeStacks[$scope][] = $this->scopes[$scope];
230 $this->scopes[$scope] = [];
232 if ( $ns === HTMLData::NS_HTML && isset( self::$predicateMap[$name] ) ) {
233 $scopeId = self::$predicateMap[$name];
234 $scope =& $this->scopes[$scopeId];
235 $elt->nextScope = isset( $scope[$name] ) ? $scope[$name] : null;
236 $scope[$name] = $elt;
239 // Update the template count
240 if ( $ns === HTMLData::NS_HTML && $name === 'template' ) {
241 $this->templateCount++;
245 public function pop() {
246 $n = count( $this->elements );
248 throw new TreeBuilderError( __METHOD__.': stack empty' );
250 // Update the stack store, index cache and current node
251 $elt = array_pop( $this->elements );
253 $elt->stackIndex = null;
254 $ns = $elt->namespace;
256 $this->current = $n ? $this->elements[$n - 1] : null;
257 // Update the scope cache
258 if ( $ns === HTMLData::NS_HTML && isset( self::$predicateMap[$name] ) ) {
259 $scope = self::$predicateMap[$name];
260 $this->scopes[$scope][$name] = $elt->nextScope;
261 $elt->nextScope = null;
263 foreach ( $this->getBrokenScopes( $ns, $name ) as $scope ) {
264 $this->scopes[$scope] = array_pop( $this->scopeStacks[$scope] );
266 // Update the template count
267 if ( $ns === HTMLData::NS_HTML && $name === 'template' ) {
268 $this->templateCount--;
273 public function replace( Element $oldElt, Element $elt ) {
274 $idx = $oldElt->stackIndex;
275 // AAA calls this function only for elements with the same name, which
276 // simplifies the scope cache update, and eliminates the template count
278 if ( $oldElt->name !== $elt->name || $oldElt->namespace !== $elt->namespace ) {
279 throw new TreeBuilderError( __METHOD__.' can only be called for elements of the same name' );
281 $ns = $elt->namespace;
283 // Find the old element in its scope list and replace it
284 if ( $ns === HTMLData::NS_HTML && isset( self::$predicateMap[$name] ) ) {
285 $scopeId = self::$predicateMap[$name];
286 $scopeElt = $this->scopes[$scopeId][$name];
287 if ( $scopeElt === $oldElt ) {
288 $this->scopes[$scopeId][$name] = $elt;
289 $elt->nextScope = $scopeElt->nextScope;
290 $scopeElt->nextScope = null;
292 $nextElt = $scopeElt->nextScope;
294 if ( $nextElt === $oldElt ) {
295 $scopeElt->nextScope = $elt;
296 $elt->nextScope = $nextElt->nextScope;
297 $scopeElt->nextScope = null;
300 $scopeElt = $scopeElt->nextScope;
301 $nextElt = $scopeElt->nextScope;
304 throw new TreeBuilderError( __METHOD__.': cannot find old element in scope cache' );
308 // Replace the stack element
309 $this->elements[$idx] = $elt;
310 if ( $idx === count( $this->elements ) - 1 ) {
311 $this->current = $elt;
313 $oldElt->stackIndex = null;
314 $elt->stackIndex = $idx;
317 public function remove( Element $elt ) {
319 $eltIndex = $elt->stackIndex;
320 if ( $eltIndex === null ) {
321 throw new TreeBuilderError( __METHOD__ . ': element not in stack' );
323 $n = count( $this->elements );
324 for ( $i = $n - 1; $i > $eltIndex; $i-- ) {
325 $tempStack[] = $this->pop();
328 foreach ( array_reverse( $tempStack ) as $temp ) {
329 $this->push( $temp );
333 public function isInScope( $name ) {
334 if ( self::$predicateMap[$name] !== self::SCOPE_DEFAULT ) {
335 throw new TreeBuilderError( "Unexpected predicate: \"$name is in scope\"" );
337 return !empty( $this->scopes[self::SCOPE_DEFAULT][$name] );
340 public function isElementInScope( Element $elt ) {
342 if ( self::$predicateMap[$name] !== self::SCOPE_DEFAULT ) {
343 throw new TreeBuilderError( "Unexpected predicate: \"$name is in scope\"" );
345 if ( !empty( $this->scopes[self::SCOPE_DEFAULT][$name] ) ) {
346 $scopeMember = $this->scopes[self::SCOPE_DEFAULT][$name];
347 while ( $scopeMember ) {
348 if ( $scopeMember === $elt ) {
351 $scopeMember = $scopeMember->nextScope;
357 public function isOneOfSetInScope( $names ) {
358 foreach ( $names as $name => $unused ) {
359 if ( $this->isInScope( $name ) ) {
366 public function isInListScope( $name ) {
367 if ( self::$predicateMap[$name] !== self::SCOPE_LIST ) {
368 throw new TreeBuilderError( "Unexpected predicate: \"$name is in list scope\"" );
370 return !empty( $this->scopes[self::SCOPE_LIST][$name] );
373 public function isInButtonScope( $name ) {
374 if ( self::$predicateMap[$name] !== self::SCOPE_BUTTON ) {
375 throw new TreeBuilderError( "Unexpected predicate: \"$name is in button scope\"" );
377 return !empty( $this->scopes[self::SCOPE_BUTTON][$name] );
380 public function isInTableScope( $name ) {
381 if ( self::$predicateMap[$name] !== self::SCOPE_TABLE ) {
382 throw new TreeBuilderError( "Unexpected predicate: \"$name is in table scope\"" );
384 return !empty( $this->scopes[self::SCOPE_TABLE][$name] );
387 public function isInSelectScope( $name ) {
388 if ( self::$predicateMap[$name] !== self::SCOPE_SELECT ) {
389 throw new TreeBuilderError( "Unexpected predicate: \"$name is in select scope\"" );
391 return !empty( $this->scopes[self::SCOPE_SELECT][$name] );
394 public function item( $idx ) {
395 return $this->elements[$idx];
398 public function length() {
399 return count( $this->elements );
402 public function hasTemplate() {
403 return (bool)$this->templateCount;
406 public function dump() {
407 return parent::dump() .
408 $this->scopeDump( self::SCOPE_DEFAULT, 'In scope' ) .
409 $this->scopeDump( self::SCOPE_LIST, 'In list scope' ) .
410 $this->scopeDump( self::SCOPE_BUTTON, 'In button scope' ) .
411 $this->scopeDump( self::SCOPE_TABLE, 'In table scope' ) .
412 $this->scopeDump( self::SCOPE_SELECT, 'In select scope' ) . "\n";
415 private function scopeDump( $scopeId, $scopeName ) {
416 if ( count( $this->scopes[$scopeId] ) ) {
417 return "$scopeName: " . implode( ', ', array_keys( $this->scopes[$scopeId] ) ) . "\n";