]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - vendor/wikimedia/remex-html/RemexHtml/Serializer/Serializer.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / vendor / wikimedia / remex-html / RemexHtml / Serializer / Serializer.php
1 <?php
2
3 namespace RemexHtml\Serializer;
4 use RemexHtml\PropGuard;
5 use RemexHtml\TreeBuilder\TreeBuilder;
6 use RemexHtml\TreeBuilder\TreeHandler;
7 use RemexHtml\TreeBuilder\Element;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
10
11 /**
12  * A TreeHandler which builds a serialized representation of a document, by
13  * encoding elements when the end tags are seen. This is faster than building
14  * a DOM and then serializing it, even if you use DOMDocument::saveHTML().
15  */
16 class Serializer implements AbstractSerializer {
17         /**
18          * A node corresponding to the Document
19          * @var SerializerNode
20          */
21         private $root;
22
23         /**
24          * The error callback
25          */
26         private $errorCallback;
27
28         /**
29          * The Formatter implementation
30          *
31          * @var Formatter
32          */
33         private $formatter;
34
35         /**
36          * All active SerializerNode objects in an array, so that they can be
37          * referred to by integer indexes. This is a way to emulate weak references,
38          * to avoid circular references, allowing nodes to be freed.
39          *
40          * @var SerializerNode[integer]
41          */
42         private $nodes = [];
43
44         /**
45          * The next key into $nodes which will be created
46          */
47         private $nextNodeId = 0;
48
49         /**
50          * True if we are parsing a fragment. The children of the <html> element
51          * will be serialized, instead of the whole document.
52          */
53         private $isFragment;
54
55         /**
56          * The result string
57          */
58         private $result = '';
59
60         /**
61          * Constructor
62          *
63          * @param Formatter $formatter
64          * @param callable|null $errorCallback A function which is called with the
65          *   details of each parse error
66          */
67         public function __construct( Formatter $formatter, $errorCallback = null ) {
68                 $this->formatter = $formatter;
69                 $this->errorCallback = $errorCallback;
70         }
71
72         public function __set( $name, $value ) {
73                 PropGuard::set( $this, $name, $value );
74         }
75
76         /**
77          * Get the final string. This can only be called after endDocument() is received.
78          */
79         public function getResult() {
80                 return $this->result;
81         }
82
83         /**
84          * Get the root SerializerNode.
85          *
86          * @return SerializerNode
87          */
88         public function getRootNode() {
89                 return $this->root;
90         }
91
92         /**
93          * Get the parent SerializerNode of a given SerializerNode
94          *
95          * @param SerializerNode $node
96          * @return SerializerNode
97          */
98         public function getParentNode( SerializerNode $node ) {
99                 return $this->nodes[$node->parentId];
100         }
101
102         /**
103          * Get the last child of a given SerializerNode
104          *
105          * @param SerializerNode $node
106          * @return SerializerNode|string|null
107          */
108         public function getLastChild( SerializerNode $node ) {
109                 $children = $node->children;
110                 $lastChildIndex = count( $children ) - 1;
111                 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
112                 return $lastChild;
113         }
114
115         public function startDocument( $fragmentNamespace, $fragmentName ) {
116                 $this->root = new SerializerNode( 0, 0, '', '', new PlainAttributes, false );
117                 $this->nodes = [ $this->root ];
118                 $this->nextNodeId = 1;
119                 $this->isFragment = $fragmentNamespace !== null;
120                 $this->result = $this->formatter->startDocument( $fragmentNamespace, $fragmentName );
121         }
122
123         public function endDocument( $pos ) {
124                 if ( $this->isFragment ) {
125                         $root = $this->root->children[0];
126                 } else {
127                         $root = $this->root;
128                 }
129                 foreach ( $root->children as $childIndex => $child ) {
130                         if ( is_string( $child ) ) {
131                                 $this->result .= $child;
132                         } else {
133                                 $this->result .= $this->stringify( $root, $child );
134                         }
135                 }
136                 $this->root = null;
137                 $this->nodes = [];
138                 $this->nextNodeId = 0;
139         }
140
141         protected function interpretPlacement( $preposition, $refElement ) {
142                 if ( $preposition === TreeBuilder::ROOT ) {
143                         return [ $this->root, null ];
144                 }
145                 if ( $refElement instanceof Element ) {
146                         $refNode = $refElement->userData;
147                 } elseif ( $refElement instanceof SerializerNode ) {
148                         $refNode = $refElement;
149                 } else {
150                         throw new SerializerError( "Invalid type of ref element" );
151                 }
152                 if ( $preposition === TreeBuilder::BEFORE ) {
153                         return [ $this->nodes[$refNode->parentId], $refNode ];
154                 } else {
155                         return [ $refNode, $refNode ];
156                 }
157         }
158
159         public function characters( $preposition, $refElement, $text, $start, $length,
160                 $sourceStart, $sourceLength
161         ) {
162                 list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
163                 $encoded = (string)$this->formatter->characters( $parent, $text, $start, $length );
164
165                 $children =& $parent->children;
166                 $lastChildIndex = count( $children ) - 1;
167                 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
168
169                 if ( $preposition === TreeBuilder::BEFORE ) {
170                         // Insert before element
171                         if ( $lastChild !== $refNode ) {
172                                 $refIndex = array_search( $refNode, $children, true );
173                                 throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" );
174                         }
175                         $children[$lastChildIndex] = $encoded;
176                         $children[$lastChildIndex + 1] = $refNode;
177                 } else {
178                         // Append to the list of children
179                         if ( is_string( $lastChild ) ) {
180                                 $children[$lastChildIndex] .= $encoded;
181                         } else {
182                                 $children[] = $encoded;
183                         }
184                 }
185         }
186
187         /**
188          * Insert an element
189          *
190          * @param integer $preposition
191          * @param Element|SerializerNode|null $refElement
192          * @param Element $element
193          * @param bool $void
194          * @param integer $sourceStart
195          * @param integer $sourceLength
196          */
197         public function insertElement( $preposition, $refElement, Element $element, $void,
198                 $sourceStart, $sourceLength
199         ) {
200                 list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
201                 $children =& $parent->children;
202                 $lastChildIndex = count( $children ) - 1;
203                 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
204
205                 if ( $element->userData ) {
206                         // This element has already been inserted, this is a reparenting operation
207                         $self = $element->userData;
208                         $oldParent = $this->nodes[$self->parentId];
209                         $oldChildren =& $oldParent->children;
210                         $oldChildIndex = array_search( $self, $oldChildren, true );
211                         if ( $oldChildIndex === false ) {
212                                 throw new SerializerError( "cannot find node to reparent: " .
213                                         $element->getDebugTag() );
214                         }
215                         // Remove from the old parent, update parent pointer
216                         $oldChildren[$oldChildIndex] = '';
217                         $self->parentId = $parent->id;
218                 } else {
219                         // Inserting an element which has not been seen before
220                         $id = $this->nextNodeId++;
221                         $self = new SerializerNode( $id, $parent->id, $element->namespace,
222                                 $element->name, $element->attrs, $void );
223                         $this->nodes[$id] = $element->userData = $self;
224                 }
225
226                 if ( $preposition === TreeBuilder::BEFORE ) {
227                         // Insert before element
228                         if ( $lastChild !== $refNode ) {
229                                 $refIndex = array_search( $refNode, $children, true );
230                                 throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" );
231                         }
232                         $children[$lastChildIndex] = $self;
233                         $children[$lastChildIndex + 1] = $refNode;
234                 } else {
235                         // Append to the list of children
236                         $children[] = $self;
237                 }
238         }
239
240         public function endTag( Element $element, $sourceStart, $sourceLength ) {
241                 if ( $element->htmlName === 'head' || $element->isVirtual ) {
242                         // <head> elements are immortal
243                         return;
244                 }
245                 $self = $element->userData;
246                 $parent = $this->nodes[$self->parentId];
247                 $children =& $parent->children;
248                 for ( $index = count( $children ) - 1; $index >= 0; $index-- ) {
249                         if ( $children[$index] === $self ) {
250                                 unset( $this->nodes[$self->id] );
251                                 $children[$index] = $this->stringify( $parent, $self );
252                                 return;
253                         }
254                 }
255                 // Ignore requests to end non-existent elements (this happens sometimes)
256         }
257
258         /**
259          * Serialize a specific node
260          *
261          * @param SerializerNode $parent The parent of $node
262          * @param SerializerNode $node The node to serialize
263          * @return string
264          */
265         private function stringify( SerializerNode $parent, SerializerNode $node ) {
266                 if ( $node->void ) {
267                         $contents = null;
268                 } else {
269                         $contents = '';
270                         foreach ( $node->children as $childIndex => $child ) {
271                                 if ( is_string( $child ) ) {
272                                         $contents .= $child;
273                                 } else {
274                                         $contents .= $this->stringify( $node, $child );
275                                 }
276                         }
277                 }
278                 return $this->formatter->element( $parent, $node, $contents );
279         }
280
281         public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
282                 $this->result .= $this->formatter->doctype( $name, $public, $system );
283         }
284
285         public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
286                 list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
287                 $encoded = $this->formatter->comment( $parent, $text );
288                 $children =& $parent->children;
289                 $lastChildIndex = count( $children ) - 1;
290                 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
291
292                 if ( $preposition === TreeBuilder::BEFORE ) {
293                         // Insert before element
294                         if ( $lastChild !== $refNode ) {
295                                 throw new SerializerError( "invalid insert position" );
296                         }
297                         $children[$lastChildIndex] = $encoded;
298                         $children[$lastChildIndex + 1] = $refNode;
299                 } else {
300                         // Append to the list of children
301                         if ( is_string( $lastChild ) ) {
302                                 $children[$lastChildIndex] .= $encoded;
303                         } else {
304                                 $children[] = $encoded;
305                         }
306                 }
307         }
308
309         public function error( $text, $pos ) {
310                 if ( $this->errorCallback ) {
311                         call_user_func( $this->errorCallback, $text, $pos );
312                 }
313         }
314
315         public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
316                 $element->attrs->merge( $attrs );
317                 if ( $element->userData instanceof SerializerNode ) {
318                         $element->userData->attrs = $element->attrs;
319                 }
320         }
321
322         public function removeNode( Element $element, $sourceStart ) {
323                 $self = $element->userData;
324                 $parent = $this->nodes[$self->parentId];
325                 $children =& $parent->children;
326                 for ( $index = count( $children ) - 1; $index >= 0; $index-- ) {
327                         if ( $children[$index] === $self ) {
328                                 $children[$index] = '';
329                                 return;
330                         }
331                 }
332                 throw new SerializerError( "cannot find element to remove" );
333         }
334
335         public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
336                 $self = $element->userData;
337                 $children = $self->children;
338                 $self->children = [];
339                 $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
340                 $newParentNode = $newParent->userData;
341                 $newParentId = $newParentNode->id;
342                 foreach ( $children as $child ) {
343                         if ( is_object( $child ) ) {
344                                 $child->parentId = $newParentId;
345                         }
346                 }
347                 $newParentNode->children = $children;
348         }
349
350         /**
351          * Get a text representation of the current state of the serializer, for
352          * debugging.
353          *
354          * @return string
355          */
356         public function dump() {
357                 $s = $this->stringify( $this->root, $this->root );
358                 return substr( $s, 2, -3 ) . "\n";
359         }
360 }