3 namespace RemexHtml\Serializer;
4 use RemexHtml\PropGuard;
5 use RemexHtml\TreeBuilder\TreeBuilder;
6 use RemexHtml\TreeBuilder\TreeHandler;
7 use RemexHtml\TreeBuilder\Element;
8 use RemexHtml\Tokenizer\Attributes;
9 use RemexHtml\Tokenizer\PlainAttributes;
12 * A TreeHandler which builds a serialized representation of a document, by
13 * encoding elements when the end tags are seen. This is faster than building
14 * a DOM and then serializing it, even if you use DOMDocument::saveHTML().
16 class Serializer implements AbstractSerializer {
18 * A node corresponding to the Document
26 private $errorCallback;
29 * The Formatter implementation
36 * All active SerializerNode objects in an array, so that they can be
37 * referred to by integer indexes. This is a way to emulate weak references,
38 * to avoid circular references, allowing nodes to be freed.
40 * @var SerializerNode[integer]
45 * The next key into $nodes which will be created
47 private $nextNodeId = 0;
50 * True if we are parsing a fragment. The children of the <html> element
51 * will be serialized, instead of the whole document.
63 * @param Formatter $formatter
64 * @param callable|null $errorCallback A function which is called with the
65 * details of each parse error
67 public function __construct( Formatter $formatter, $errorCallback = null ) {
68 $this->formatter = $formatter;
69 $this->errorCallback = $errorCallback;
72 public function __set( $name, $value ) {
73 PropGuard::set( $this, $name, $value );
77 * Get the final string. This can only be called after endDocument() is received.
79 public function getResult() {
84 * Get the root SerializerNode.
86 * @return SerializerNode
88 public function getRootNode() {
93 * Get the parent SerializerNode of a given SerializerNode
95 * @param SerializerNode $node
96 * @return SerializerNode
98 public function getParentNode( SerializerNode $node ) {
99 return $this->nodes[$node->parentId];
103 * Get the last child of a given SerializerNode
105 * @param SerializerNode $node
106 * @return SerializerNode|string|null
108 public function getLastChild( SerializerNode $node ) {
109 $children = $node->children;
110 $lastChildIndex = count( $children ) - 1;
111 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
115 public function startDocument( $fragmentNamespace, $fragmentName ) {
116 $this->root = new SerializerNode( 0, 0, '', '', new PlainAttributes, false );
117 $this->nodes = [ $this->root ];
118 $this->nextNodeId = 1;
119 $this->isFragment = $fragmentNamespace !== null;
120 $this->result = $this->formatter->startDocument( $fragmentNamespace, $fragmentName );
123 public function endDocument( $pos ) {
124 if ( $this->isFragment ) {
125 $root = $this->root->children[0];
129 foreach ( $root->children as $childIndex => $child ) {
130 if ( is_string( $child ) ) {
131 $this->result .= $child;
133 $this->result .= $this->stringify( $root, $child );
138 $this->nextNodeId = 0;
141 protected function interpretPlacement( $preposition, $refElement ) {
142 if ( $preposition === TreeBuilder::ROOT ) {
143 return [ $this->root, null ];
145 if ( $refElement instanceof Element ) {
146 $refNode = $refElement->userData;
147 } elseif ( $refElement instanceof SerializerNode ) {
148 $refNode = $refElement;
150 throw new SerializerError( "Invalid type of ref element" );
152 if ( $preposition === TreeBuilder::BEFORE ) {
153 return [ $this->nodes[$refNode->parentId], $refNode ];
155 return [ $refNode, $refNode ];
159 public function characters( $preposition, $refElement, $text, $start, $length,
160 $sourceStart, $sourceLength
162 list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
163 $encoded = (string)$this->formatter->characters( $parent, $text, $start, $length );
165 $children =& $parent->children;
166 $lastChildIndex = count( $children ) - 1;
167 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
169 if ( $preposition === TreeBuilder::BEFORE ) {
170 // Insert before element
171 if ( $lastChild !== $refNode ) {
172 $refIndex = array_search( $refNode, $children, true );
173 throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" );
175 $children[$lastChildIndex] = $encoded;
176 $children[$lastChildIndex + 1] = $refNode;
178 // Append to the list of children
179 if ( is_string( $lastChild ) ) {
180 $children[$lastChildIndex] .= $encoded;
182 $children[] = $encoded;
190 * @param integer $preposition
191 * @param Element|SerializerNode|null $refElement
192 * @param Element $element
194 * @param integer $sourceStart
195 * @param integer $sourceLength
197 public function insertElement( $preposition, $refElement, Element $element, $void,
198 $sourceStart, $sourceLength
200 list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
201 $children =& $parent->children;
202 $lastChildIndex = count( $children ) - 1;
203 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
205 if ( $element->userData ) {
206 // This element has already been inserted, this is a reparenting operation
207 $self = $element->userData;
208 $oldParent = $this->nodes[$self->parentId];
209 $oldChildren =& $oldParent->children;
210 $oldChildIndex = array_search( $self, $oldChildren, true );
211 if ( $oldChildIndex === false ) {
212 throw new SerializerError( "cannot find node to reparent: " .
213 $element->getDebugTag() );
215 // Remove from the old parent, update parent pointer
216 $oldChildren[$oldChildIndex] = '';
217 $self->parentId = $parent->id;
219 // Inserting an element which has not been seen before
220 $id = $this->nextNodeId++;
221 $self = new SerializerNode( $id, $parent->id, $element->namespace,
222 $element->name, $element->attrs, $void );
223 $this->nodes[$id] = $element->userData = $self;
226 if ( $preposition === TreeBuilder::BEFORE ) {
227 // Insert before element
228 if ( $lastChild !== $refNode ) {
229 $refIndex = array_search( $refNode, $children, true );
230 throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" );
232 $children[$lastChildIndex] = $self;
233 $children[$lastChildIndex + 1] = $refNode;
235 // Append to the list of children
240 public function endTag( Element $element, $sourceStart, $sourceLength ) {
241 if ( $element->htmlName === 'head' || $element->isVirtual ) {
242 // <head> elements are immortal
245 $self = $element->userData;
246 $parent = $this->nodes[$self->parentId];
247 $children =& $parent->children;
248 for ( $index = count( $children ) - 1; $index >= 0; $index-- ) {
249 if ( $children[$index] === $self ) {
250 unset( $this->nodes[$self->id] );
251 $children[$index] = $this->stringify( $parent, $self );
255 // Ignore requests to end non-existent elements (this happens sometimes)
259 * Serialize a specific node
261 * @param SerializerNode $parent The parent of $node
262 * @param SerializerNode $node The node to serialize
265 private function stringify( SerializerNode $parent, SerializerNode $node ) {
270 foreach ( $node->children as $childIndex => $child ) {
271 if ( is_string( $child ) ) {
274 $contents .= $this->stringify( $node, $child );
278 return $this->formatter->element( $parent, $node, $contents );
281 public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
282 $this->result .= $this->formatter->doctype( $name, $public, $system );
285 public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
286 list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
287 $encoded = $this->formatter->comment( $parent, $text );
288 $children =& $parent->children;
289 $lastChildIndex = count( $children ) - 1;
290 $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
292 if ( $preposition === TreeBuilder::BEFORE ) {
293 // Insert before element
294 if ( $lastChild !== $refNode ) {
295 throw new SerializerError( "invalid insert position" );
297 $children[$lastChildIndex] = $encoded;
298 $children[$lastChildIndex + 1] = $refNode;
300 // Append to the list of children
301 if ( is_string( $lastChild ) ) {
302 $children[$lastChildIndex] .= $encoded;
304 $children[] = $encoded;
309 public function error( $text, $pos ) {
310 if ( $this->errorCallback ) {
311 call_user_func( $this->errorCallback, $text, $pos );
315 public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
316 $element->attrs->merge( $attrs );
317 if ( $element->userData instanceof SerializerNode ) {
318 $element->userData->attrs = $element->attrs;
322 public function removeNode( Element $element, $sourceStart ) {
323 $self = $element->userData;
324 $parent = $this->nodes[$self->parentId];
325 $children =& $parent->children;
326 for ( $index = count( $children ) - 1; $index >= 0; $index-- ) {
327 if ( $children[$index] === $self ) {
328 $children[$index] = '';
332 throw new SerializerError( "cannot find element to remove" );
335 public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
336 $self = $element->userData;
337 $children = $self->children;
338 $self->children = [];
339 $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
340 $newParentNode = $newParent->userData;
341 $newParentId = $newParentNode->id;
342 foreach ( $children as $child ) {
343 if ( is_object( $child ) ) {
344 $child->parentId = $newParentId;
347 $newParentNode->children = $children;
351 * Get a text representation of the current state of the serializer, for
356 public function dump() {
357 $s = $this->stringify( $this->root, $this->root );
358 return substr( $s, 2, -3 ) . "\n";