]> scripts.mit.edu Git - autoinstallsdev/mediawiki.git/blob - vendor/wikimedia/remex-html/RemexHtml/TreeBuilder/Dispatcher.php
MediaWiki 1.30.2
[autoinstallsdev/mediawiki.git] / vendor / wikimedia / remex-html / RemexHtml / TreeBuilder / Dispatcher.php
1 <?php
2
3 namespace RemexHtml\TreeBuilder;
4 use RemexHtml\HTMLData;
5 use RemexHtml\Tokenizer\Attributes;
6 use RemexHtml\Tokenizer\TokenHandler;
7 use RemexHtml\Tokenizer\Tokenizer;
8
9 /**
10  * This is the approximate equivalent of the "tree construction dispatcher" in
11  * the spec. It receives token events and distributes them to the appropriate
12  * insertion mode class. It also implements some things specific to the
13  * dispatcher state:
14  *   - "Reset the insertion mode appropriately"
15  *   - The stack of template insertion modes
16  *   - The "original insertion mode"
17  */
18 class Dispatcher implements TokenHandler {
19         /**
20          * The insertion mode indexes
21          */
22         const INITIAL = 1;
23         const BEFORE_HTML = 2;
24         const BEFORE_HEAD = 3;
25         const IN_HEAD = 4;
26         const IN_HEAD_NOSCRIPT = 5;
27         const AFTER_HEAD = 6;
28         const IN_BODY = 7;
29         const TEXT = 8;
30         const IN_TABLE = 9;
31         const IN_TABLE_TEXT = 10;
32         const IN_CAPTION = 11;
33         const IN_COLUMN_GROUP = 12;
34         const IN_TABLE_BODY = 13;
35         const IN_ROW = 14;
36         const IN_CELL = 15;
37         const IN_SELECT = 16;
38         const IN_SELECT_IN_TABLE = 17;
39         const IN_TEMPLATE = 18;
40         const AFTER_BODY = 19;
41         const IN_FRAMESET = 20;
42         const AFTER_FRAMESET = 21;
43         const AFTER_AFTER_BODY = 22;
44         const AFTER_AFTER_FRAMESET = 23;
45         const IN_FOREIGN_CONTENT = 24;
46         const IN_PRE = 25;
47         const IN_TEXTAREA = 26;
48
49         /**
50          * The handler class for each insertion mode
51          */
52         protected static $handlerClasses = [
53                 self::INITIAL => Initial::class,
54                 self::BEFORE_HTML => BeforeHtml::class,
55                 self::BEFORE_HEAD => BeforeHead::class,
56                 self::IN_HEAD => InHead::class,
57                 self::IN_HEAD_NOSCRIPT => InHeadNoscript::class,
58                 self::AFTER_HEAD => AfterHead::class,
59                 self::IN_BODY => InBody::class,
60                 self::TEXT => Text::class,
61                 self::IN_TABLE => InTable::class,
62                 self::IN_TABLE_TEXT => InTableText::class,
63                 self::IN_CAPTION => InCaption::class,
64                 self::IN_COLUMN_GROUP => InColumnGroup::class,
65                 self::IN_TABLE_BODY => InTableBody::class,
66                 self::IN_ROW => InRow::class,
67                 self::IN_CELL => InCell::class,
68                 self::IN_SELECT => InSelect::class,
69                 self::IN_SELECT_IN_TABLE => InSelectInTable::class,
70                 self::IN_TEMPLATE => InTemplate::class,
71                 self::AFTER_BODY => AfterBody::class,
72                 self::IN_FRAMESET => InFrameset::class,
73                 self::AFTER_FRAMESET => AfterFrameset::class,
74                 self::AFTER_AFTER_BODY => AfterAfterBody::class,
75                 self::AFTER_AFTER_FRAMESET => AfterAfterFrameset::class,
76                 self::IN_FOREIGN_CONTENT => InForeignContent::class,
77                 self::IN_PRE => InPre::class,
78                 self::IN_TEXTAREA => InTextarea::class,
79         ];
80
81         // Public shortcuts for "using the rules for" actions
82         public $inHead;
83         public $inBody;
84         public $inTable;
85         public $inSelect;
86         public $inTemplate;
87         public $inForeign;
88
89         /// @var TreeBuilder
90         protected $builder;
91
92         /**
93          * The InsertionMode object for the current insertion mode in HTML content
94          */
95         protected $handler;
96
97         /**
98          * An array mapping insertion mode indexes to InsertionMode objects
99          */
100         protected $dispatchTable;
101
102         /**
103          * The insertion mode index
104          */
105         protected $mode;
106
107         /**
108          * The "original insertion mode" index
109          */
110         protected $originalMode;
111
112         /**
113          * The insertion mode sets this to true to acknowledge the tag's
114          * self-closing flag.
115          */
116         public $ack;
117
118         /**
119          * The stack of template insertion modes
120          * @var TemplateModeStack
121          */
122         public $templateModeStack;
123
124         /**
125          * @param TreeBuilder $builder
126          */
127         public function __construct( TreeBuilder $builder ) {
128                 $this->builder = $builder;
129                 $this->templateModeStack = new TemplateModeStack;
130         }
131
132         /**
133          * Switch the insertion mode, and return the new handler
134          *
135          * @param integer $mode
136          * @return InsertionMode
137          */
138         public function switchMode( $mode ) {
139                 $this->mode = $mode;
140                 return $this->handler = $this->dispatchTable[$mode];
141         }
142
143         /**
144          * Let the original insertion mode be the current insertion mode, and
145          * switch the insertion mode to some new value. Return the new handler.
146          *
147          * @param integer $mode
148          * @return InsertionMode
149          */
150         public function switchAndSave( $mode ) {
151                 $this->originalMode = $this->mode;
152                 $this->mode = $mode;
153                 return $this->handler = $this->dispatchTable[$mode];
154         }
155
156         /**
157          * Switch the insertion mode to the original insertion mode and return the
158          * new handler.
159          *
160          * @return InsertionMode
161          */
162         public function restoreMode() {
163                 if ( $this->originalMode === null ) {
164                         throw new TreeBuilderError( "original insertion mode is not set" );
165                 }
166                 $mode = $this->mode = $this->originalMode;
167                 $this->originalMode = null;
168                 return $this->handler = $this->dispatchTable[$mode];
169         }
170
171         /**
172          * Get the handler for the current insertion mode in HTML content.
173          * This is used by the "in foreign" handler to execute the HTML insertion
174          * mode. It does not necessarily correspond to the handler currently being
175          * executed.
176          *
177          * @return InsertionMode
178          */
179         public function getHandler() {
180                 return $this->handler;
181         }
182
183         /**
184          * True if we are in a table mode, for the purposes of switching to
185          * IN_SELECT_IN_TABLE as opposed to IN_SELECT.
186          *
187          * @return bool
188          */
189         public function isInTableMode() {
190                 static $tableModes = [
191                         self::IN_TABLE => true,
192                         self::IN_CAPTION => true,
193                         self::IN_TABLE_BODY => true,
194                         self::IN_ROW => true,
195                         self::IN_CELL => true ];
196                 return isset( $tableModes[$this->mode] );
197         }
198
199         /**
200          * Reset the insertion mode appropriately, and return the new handler.
201          *
202          * @return InsertionMode
203          */
204         public function reset() {
205                 return $this->switchMode( $this->getAppropriateMode() );
206         }
207
208         /**
209          * Get the insertion mode index which is switched to when we reset the
210          * insertion mode appropriately.
211          *
212          * @return integer
213          */
214         protected function getAppropriateMode() {
215                 $builder = $this->builder;
216                 $stack = $builder->stack;
217                 $last = false;
218                 $node = $stack->current;
219                 for ( $idx = $stack->length() - 1; $idx >= 0; $idx-- ) {
220                         $node = $stack->item( $idx );
221                         if ( $idx === 0 ) {
222                                 $last = true;
223                                 if ( $builder->isFragment ) {
224                                         $node = $builder->fragmentContext;
225                                 }
226                         }
227
228                         switch ( $node->htmlName ) {
229                         case 'select':
230                                 if ( $last ) {
231                                         return self::IN_SELECT;
232                                 }
233                                 for ( $ancestorIdx = $idx - 1; $ancestorIdx >= 1; $ancestorIdx-- ) {
234                                         $ancestor = $stack->item( $ancestorIdx );
235                                         if ( $ancestor->htmlName === 'template' ) {
236                                                 return self::IN_SELECT;
237                                         } elseif ( $ancestor->htmlName === 'table' ) {
238                                                 return self::IN_SELECT_IN_TABLE;
239                                         }
240                                 }
241                                 return self::IN_SELECT;
242
243                         case 'td':
244                         case 'th':
245                                 if ( !$last ) {
246                                         return self::IN_CELL;
247                                 }
248                                 break;
249
250                         case 'tr':
251                                 return self::IN_ROW;
252
253                         case 'tbody':
254                         case 'thead':
255                         case 'tfoot':
256                                 return self::IN_TABLE_BODY;
257
258                         case 'caption':
259                                 return self::IN_CAPTION;
260
261                         case 'colgroup':
262                                 return self::IN_COLUMN_GROUP;
263
264                         case 'table':
265                                 return self::IN_TABLE;
266
267                         case 'template':
268                                 return $this->templateModeStack->current;
269
270                         case 'head':
271                                 if ( $last ) {
272                                         return self::IN_BODY;
273                                 } else {
274                                         return self::IN_HEAD;
275                                 }
276
277                         case 'body':
278                                 return self::IN_BODY;
279
280                         case 'frameset':
281                                 return self::IN_FRAMESET;
282
283                         case 'html':
284                                 if ( $builder->headElement === null ) {
285                                         return self::BEFORE_HEAD;
286                                 } else {
287                                         return self::AFTER_HEAD;
288                                 }
289                         }
290                 }
291
292                 return self::IN_BODY;
293         }
294
295         /**
296          * If the stack of open elements is empty, return null, otherwise return
297          * the adjusted current node.
298          */
299         protected function dispatcherCurrentNode() {
300                 $current = $this->builder->stack->current;
301                 if ( $current && $current->stackIndex === 0 && $this->builder->isFragment ) {
302                         return $this->builder->fragmentContext;
303                 } else {
304                         return $current;
305                 }
306         }
307
308         public function startDocument( Tokenizer $tokenizer, $namespace, $name ) {
309                 $this->dispatchTable = [];
310                 foreach ( self::$handlerClasses as $mode => $class ) {
311                         $this->dispatchTable[$mode] = new $class( $this->builder, $this );
312                 }
313
314                 $this->inHead = $this->dispatchTable[self::IN_HEAD];
315                 $this->inBody = $this->dispatchTable[self::IN_BODY];
316                 $this->inTable = $this->dispatchTable[self::IN_TABLE];
317                 $this->inSelect = $this->dispatchTable[self::IN_SELECT];
318                 $this->inTemplate = $this->dispatchTable[self::IN_TEMPLATE];
319                 $this->inForeign = $this->dispatchTable[self::IN_FOREIGN_CONTENT];
320
321                 $this->switchMode( self::INITIAL );
322
323                 $this->builder->startDocument( $tokenizer, $namespace, $name );
324                 if ( $namespace !== null ) {
325                         if ( $namespace === HTMLData::NS_HTML && $name === 'template' ) {
326                                 $this->templateModeStack->push( self::IN_TEMPLATE );
327                         }
328                         $this->reset();
329                 }
330         }
331
332         public function endDocument( $pos ) {
333                 $this->handler->endDocument( $pos );
334
335                 // All references to insertion modes must be explicitly released, since
336                 // they have a circular reference back to $this
337                 $this->dispatchTable = [];
338                 $this->handler = null;
339                 $this->inHead = null;
340                 $this->inBody = null;
341                 $this->inTable = null;
342                 $this->inSelect = null;
343                 $this->inTemplate = null;
344                 $this->inForeign = null;
345         }
346
347         public function error( $text, $pos ) {
348                 $this->builder->error( $text, $pos );
349         }
350
351         public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
352                 $current = $this->dispatcherCurrentNode();
353                 if ( !$current
354                         || $current->namespace === HTMLData::NS_HTML
355                         || $current->isMathmlTextIntegration()
356                         || $current->isHtmlIntegration()
357                 ) {
358                         $this->handler->characters( $text, $start, $length, $sourceStart, $sourceLength );
359                 } else {
360                         $this->inForeign->characters(
361                                 $text, $start, $length, $sourceStart, $sourceLength );
362                 }
363         }
364
365         public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
366                 $this->ack = false;
367                 $current = $this->dispatcherCurrentNode();
368                 if ( !$current
369                         || $current->namespace === HTMLData::NS_HTML
370                         || ( $current->isMathmlTextIntegration()
371                                 && $name !== 'mglyph'
372                                 && $name !== 'malignmark'
373                         )
374                         || ( $name === 'svg'
375                                 && $current->namespace === HTMLData::NS_MATHML
376                                 && $current->name === 'annotation-xml'
377                         )
378                         || $current->isHtmlIntegration()
379                 ) {
380                         $this->handler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
381                 } else {
382                         $this->inForeign->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
383                 }
384                 if ( $selfClose && !$this->ack ) {
385                         $this->builder->error( "unacknowledged self-closing tag", $sourceStart );
386                 }
387         }
388
389         public function endTag( $name, $sourceStart, $sourceLength ) {
390                 $current = $this->dispatcherCurrentNode();
391                 if ( !$current || $current->namespace === HTMLData::NS_HTML ) {
392                         $this->handler->endTag( $name, $sourceStart, $sourceLength );
393                 } else {
394                         $this->inForeign->endTag( $name, $sourceStart, $sourceLength );
395                 }
396         }
397
398         public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
399                 $current = $this->dispatcherCurrentNode();
400                 if ( !$current || $current->namespace === HTMLData::NS_HTML ) {
401                         $this->handler->doctype( $name, $public, $system, $quirks,
402                                 $sourceStart, $sourceLength );
403                 } else {
404                         $this->inForeign->doctype( $name, $public, $system, $quirks,
405                                 $sourceStart, $sourceLength );
406                 }
407         }
408
409         public function comment( $text, $sourceStart, $sourceLength ) {
410                 $current = $this->dispatcherCurrentNode();
411                 if ( !$current || $current->namespace === HTMLData::NS_HTML ) {
412                         $this->handler->comment( $text, $sourceStart, $sourceLength );
413                 } else {
414                         $this->inForeign->comment( $text, $sourceStart, $sourceLength );
415                 }
416         }
417 }