3 namespace RemexHtml\TreeBuilder;
4 use RemexHtml\HTMLData;
5 use RemexHtml\Tokenizer\Attributes;
6 use RemexHtml\Tokenizer\TokenHandler;
7 use RemexHtml\Tokenizer\Tokenizer;
10 * This is the approximate equivalent of the "tree construction dispatcher" in
11 * the spec. It receives token events and distributes them to the appropriate
12 * insertion mode class. It also implements some things specific to the
14 * - "Reset the insertion mode appropriately"
15 * - The stack of template insertion modes
16 * - The "original insertion mode"
18 class Dispatcher implements TokenHandler {
20 * The insertion mode indexes
23 const BEFORE_HTML = 2;
24 const BEFORE_HEAD = 3;
26 const IN_HEAD_NOSCRIPT = 5;
31 const IN_TABLE_TEXT = 10;
32 const IN_CAPTION = 11;
33 const IN_COLUMN_GROUP = 12;
34 const IN_TABLE_BODY = 13;
38 const IN_SELECT_IN_TABLE = 17;
39 const IN_TEMPLATE = 18;
40 const AFTER_BODY = 19;
41 const IN_FRAMESET = 20;
42 const AFTER_FRAMESET = 21;
43 const AFTER_AFTER_BODY = 22;
44 const AFTER_AFTER_FRAMESET = 23;
45 const IN_FOREIGN_CONTENT = 24;
47 const IN_TEXTAREA = 26;
50 * The handler class for each insertion mode
52 protected static $handlerClasses = [
53 self::INITIAL => Initial::class,
54 self::BEFORE_HTML => BeforeHtml::class,
55 self::BEFORE_HEAD => BeforeHead::class,
56 self::IN_HEAD => InHead::class,
57 self::IN_HEAD_NOSCRIPT => InHeadNoscript::class,
58 self::AFTER_HEAD => AfterHead::class,
59 self::IN_BODY => InBody::class,
60 self::TEXT => Text::class,
61 self::IN_TABLE => InTable::class,
62 self::IN_TABLE_TEXT => InTableText::class,
63 self::IN_CAPTION => InCaption::class,
64 self::IN_COLUMN_GROUP => InColumnGroup::class,
65 self::IN_TABLE_BODY => InTableBody::class,
66 self::IN_ROW => InRow::class,
67 self::IN_CELL => InCell::class,
68 self::IN_SELECT => InSelect::class,
69 self::IN_SELECT_IN_TABLE => InSelectInTable::class,
70 self::IN_TEMPLATE => InTemplate::class,
71 self::AFTER_BODY => AfterBody::class,
72 self::IN_FRAMESET => InFrameset::class,
73 self::AFTER_FRAMESET => AfterFrameset::class,
74 self::AFTER_AFTER_BODY => AfterAfterBody::class,
75 self::AFTER_AFTER_FRAMESET => AfterAfterFrameset::class,
76 self::IN_FOREIGN_CONTENT => InForeignContent::class,
77 self::IN_PRE => InPre::class,
78 self::IN_TEXTAREA => InTextarea::class,
81 // Public shortcuts for "using the rules for" actions
93 * The InsertionMode object for the current insertion mode in HTML content
98 * An array mapping insertion mode indexes to InsertionMode objects
100 protected $dispatchTable;
103 * The insertion mode index
108 * The "original insertion mode" index
110 protected $originalMode;
113 * The insertion mode sets this to true to acknowledge the tag's
119 * The stack of template insertion modes
120 * @var TemplateModeStack
122 public $templateModeStack;
125 * @param TreeBuilder $builder
127 public function __construct( TreeBuilder $builder ) {
128 $this->builder = $builder;
129 $this->templateModeStack = new TemplateModeStack;
133 * Switch the insertion mode, and return the new handler
135 * @param integer $mode
136 * @return InsertionMode
138 public function switchMode( $mode ) {
140 return $this->handler = $this->dispatchTable[$mode];
144 * Let the original insertion mode be the current insertion mode, and
145 * switch the insertion mode to some new value. Return the new handler.
147 * @param integer $mode
148 * @return InsertionMode
150 public function switchAndSave( $mode ) {
151 $this->originalMode = $this->mode;
153 return $this->handler = $this->dispatchTable[$mode];
157 * Switch the insertion mode to the original insertion mode and return the
160 * @return InsertionMode
162 public function restoreMode() {
163 if ( $this->originalMode === null ) {
164 throw new TreeBuilderError( "original insertion mode is not set" );
166 $mode = $this->mode = $this->originalMode;
167 $this->originalMode = null;
168 return $this->handler = $this->dispatchTable[$mode];
172 * Get the handler for the current insertion mode in HTML content.
173 * This is used by the "in foreign" handler to execute the HTML insertion
174 * mode. It does not necessarily correspond to the handler currently being
177 * @return InsertionMode
179 public function getHandler() {
180 return $this->handler;
184 * True if we are in a table mode, for the purposes of switching to
185 * IN_SELECT_IN_TABLE as opposed to IN_SELECT.
189 public function isInTableMode() {
190 static $tableModes = [
191 self::IN_TABLE => true,
192 self::IN_CAPTION => true,
193 self::IN_TABLE_BODY => true,
194 self::IN_ROW => true,
195 self::IN_CELL => true ];
196 return isset( $tableModes[$this->mode] );
200 * Reset the insertion mode appropriately, and return the new handler.
202 * @return InsertionMode
204 public function reset() {
205 return $this->switchMode( $this->getAppropriateMode() );
209 * Get the insertion mode index which is switched to when we reset the
210 * insertion mode appropriately.
214 protected function getAppropriateMode() {
215 $builder = $this->builder;
216 $stack = $builder->stack;
218 $node = $stack->current;
219 for ( $idx = $stack->length() - 1; $idx >= 0; $idx-- ) {
220 $node = $stack->item( $idx );
223 if ( $builder->isFragment ) {
224 $node = $builder->fragmentContext;
228 switch ( $node->htmlName ) {
231 return self::IN_SELECT;
233 for ( $ancestorIdx = $idx - 1; $ancestorIdx >= 1; $ancestorIdx-- ) {
234 $ancestor = $stack->item( $ancestorIdx );
235 if ( $ancestor->htmlName === 'template' ) {
236 return self::IN_SELECT;
237 } elseif ( $ancestor->htmlName === 'table' ) {
238 return self::IN_SELECT_IN_TABLE;
241 return self::IN_SELECT;
246 return self::IN_CELL;
256 return self::IN_TABLE_BODY;
259 return self::IN_CAPTION;
262 return self::IN_COLUMN_GROUP;
265 return self::IN_TABLE;
268 return $this->templateModeStack->current;
272 return self::IN_BODY;
274 return self::IN_HEAD;
278 return self::IN_BODY;
281 return self::IN_FRAMESET;
284 if ( $builder->headElement === null ) {
285 return self::BEFORE_HEAD;
287 return self::AFTER_HEAD;
292 return self::IN_BODY;
296 * If the stack of open elements is empty, return null, otherwise return
297 * the adjusted current node.
299 protected function dispatcherCurrentNode() {
300 $current = $this->builder->stack->current;
301 if ( $current && $current->stackIndex === 0 && $this->builder->isFragment ) {
302 return $this->builder->fragmentContext;
308 public function startDocument( Tokenizer $tokenizer, $namespace, $name ) {
309 $this->dispatchTable = [];
310 foreach ( self::$handlerClasses as $mode => $class ) {
311 $this->dispatchTable[$mode] = new $class( $this->builder, $this );
314 $this->inHead = $this->dispatchTable[self::IN_HEAD];
315 $this->inBody = $this->dispatchTable[self::IN_BODY];
316 $this->inTable = $this->dispatchTable[self::IN_TABLE];
317 $this->inSelect = $this->dispatchTable[self::IN_SELECT];
318 $this->inTemplate = $this->dispatchTable[self::IN_TEMPLATE];
319 $this->inForeign = $this->dispatchTable[self::IN_FOREIGN_CONTENT];
321 $this->switchMode( self::INITIAL );
323 $this->builder->startDocument( $tokenizer, $namespace, $name );
324 if ( $namespace !== null ) {
325 if ( $namespace === HTMLData::NS_HTML && $name === 'template' ) {
326 $this->templateModeStack->push( self::IN_TEMPLATE );
332 public function endDocument( $pos ) {
333 $this->handler->endDocument( $pos );
335 // All references to insertion modes must be explicitly released, since
336 // they have a circular reference back to $this
337 $this->dispatchTable = [];
338 $this->handler = null;
339 $this->inHead = null;
340 $this->inBody = null;
341 $this->inTable = null;
342 $this->inSelect = null;
343 $this->inTemplate = null;
344 $this->inForeign = null;
347 public function error( $text, $pos ) {
348 $this->builder->error( $text, $pos );
351 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
352 $current = $this->dispatcherCurrentNode();
354 || $current->namespace === HTMLData::NS_HTML
355 || $current->isMathmlTextIntegration()
356 || $current->isHtmlIntegration()
358 $this->handler->characters( $text, $start, $length, $sourceStart, $sourceLength );
360 $this->inForeign->characters(
361 $text, $start, $length, $sourceStart, $sourceLength );
365 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
367 $current = $this->dispatcherCurrentNode();
369 || $current->namespace === HTMLData::NS_HTML
370 || ( $current->isMathmlTextIntegration()
371 && $name !== 'mglyph'
372 && $name !== 'malignmark'
375 && $current->namespace === HTMLData::NS_MATHML
376 && $current->name === 'annotation-xml'
378 || $current->isHtmlIntegration()
380 $this->handler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
382 $this->inForeign->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
384 if ( $selfClose && !$this->ack ) {
385 $this->builder->error( "unacknowledged self-closing tag", $sourceStart );
389 public function endTag( $name, $sourceStart, $sourceLength ) {
390 $current = $this->dispatcherCurrentNode();
391 if ( !$current || $current->namespace === HTMLData::NS_HTML ) {
392 $this->handler->endTag( $name, $sourceStart, $sourceLength );
394 $this->inForeign->endTag( $name, $sourceStart, $sourceLength );
398 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
399 $current = $this->dispatcherCurrentNode();
400 if ( !$current || $current->namespace === HTMLData::NS_HTML ) {
401 $this->handler->doctype( $name, $public, $system, $quirks,
402 $sourceStart, $sourceLength );
404 $this->inForeign->doctype( $name, $public, $system, $quirks,
405 $sourceStart, $sourceLength );
409 public function comment( $text, $sourceStart, $sourceLength ) {
410 $current = $this->dispatcherCurrentNode();
411 if ( !$current || $current->namespace === HTMLData::NS_HTML ) {
412 $this->handler->comment( $text, $sourceStart, $sourceLength );
414 $this->inForeign->comment( $text, $sourceStart, $sourceLength );