--- /dev/null
+<?php
+
+namespace RemexHtml\Serializer;
+use RemexHtml\PropGuard;
+use RemexHtml\TreeBuilder\TreeBuilder;
+use RemexHtml\TreeBuilder\TreeHandler;
+use RemexHtml\TreeBuilder\Element;
+use RemexHtml\Tokenizer\Attributes;
+use RemexHtml\Tokenizer\PlainAttributes;
+
+/**
+ * A TreeHandler which builds a serialized representation of a document, by
+ * encoding elements when the end tags are seen. This is faster than building
+ * a DOM and then serializing it, even if you use DOMDocument::saveHTML().
+ */
+class Serializer implements AbstractSerializer {
+ /**
+ * A node corresponding to the Document
+ * @var SerializerNode
+ */
+ private $root;
+
+ /**
+ * The error callback
+ */
+ private $errorCallback;
+
+ /**
+ * The Formatter implementation
+ *
+ * @var Formatter
+ */
+ private $formatter;
+
+ /**
+ * All active SerializerNode objects in an array, so that they can be
+ * referred to by integer indexes. This is a way to emulate weak references,
+ * to avoid circular references, allowing nodes to be freed.
+ *
+ * @var SerializerNode[integer]
+ */
+ private $nodes = [];
+
+ /**
+ * The next key into $nodes which will be created
+ */
+ private $nextNodeId = 0;
+
+ /**
+ * True if we are parsing a fragment. The children of the <html> element
+ * will be serialized, instead of the whole document.
+ */
+ private $isFragment;
+
+ /**
+ * The result string
+ */
+ private $result = '';
+
+ /**
+ * Constructor
+ *
+ * @param Formatter $formatter
+ * @param callable|null $errorCallback A function which is called with the
+ * details of each parse error
+ */
+ public function __construct( Formatter $formatter, $errorCallback = null ) {
+ $this->formatter = $formatter;
+ $this->errorCallback = $errorCallback;
+ }
+
+ public function __set( $name, $value ) {
+ PropGuard::set( $this, $name, $value );
+ }
+
+ /**
+ * Get the final string. This can only be called after endDocument() is received.
+ */
+ public function getResult() {
+ return $this->result;
+ }
+
+ /**
+ * Get the root SerializerNode.
+ *
+ * @return SerializerNode
+ */
+ public function getRootNode() {
+ return $this->root;
+ }
+
+ /**
+ * Get the parent SerializerNode of a given SerializerNode
+ *
+ * @param SerializerNode $node
+ * @return SerializerNode
+ */
+ public function getParentNode( SerializerNode $node ) {
+ return $this->nodes[$node->parentId];
+ }
+
+ /**
+ * Get the last child of a given SerializerNode
+ *
+ * @param SerializerNode $node
+ * @return SerializerNode|string|null
+ */
+ public function getLastChild( SerializerNode $node ) {
+ $children = $node->children;
+ $lastChildIndex = count( $children ) - 1;
+ $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
+ return $lastChild;
+ }
+
+ public function startDocument( $fragmentNamespace, $fragmentName ) {
+ $this->root = new SerializerNode( 0, 0, '', '', new PlainAttributes, false );
+ $this->nodes = [ $this->root ];
+ $this->nextNodeId = 1;
+ $this->isFragment = $fragmentNamespace !== null;
+ $this->result = $this->formatter->startDocument( $fragmentNamespace, $fragmentName );
+ }
+
+ public function endDocument( $pos ) {
+ if ( $this->isFragment ) {
+ $root = $this->root->children[0];
+ } else {
+ $root = $this->root;
+ }
+ foreach ( $root->children as $childIndex => $child ) {
+ if ( is_string( $child ) ) {
+ $this->result .= $child;
+ } else {
+ $this->result .= $this->stringify( $root, $child );
+ }
+ }
+ $this->root = null;
+ $this->nodes = [];
+ $this->nextNodeId = 0;
+ }
+
+ protected function interpretPlacement( $preposition, $refElement ) {
+ if ( $preposition === TreeBuilder::ROOT ) {
+ return [ $this->root, null ];
+ }
+ if ( $refElement instanceof Element ) {
+ $refNode = $refElement->userData;
+ } elseif ( $refElement instanceof SerializerNode ) {
+ $refNode = $refElement;
+ } else {
+ throw new SerializerError( "Invalid type of ref element" );
+ }
+ if ( $preposition === TreeBuilder::BEFORE ) {
+ return [ $this->nodes[$refNode->parentId], $refNode ];
+ } else {
+ return [ $refNode, $refNode ];
+ }
+ }
+
+ public function characters( $preposition, $refElement, $text, $start, $length,
+ $sourceStart, $sourceLength
+ ) {
+ list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
+ $encoded = (string)$this->formatter->characters( $parent, $text, $start, $length );
+
+ $children =& $parent->children;
+ $lastChildIndex = count( $children ) - 1;
+ $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
+
+ if ( $preposition === TreeBuilder::BEFORE ) {
+ // Insert before element
+ if ( $lastChild !== $refNode ) {
+ $refIndex = array_search( $refNode, $children, true );
+ throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" );
+ }
+ $children[$lastChildIndex] = $encoded;
+ $children[$lastChildIndex + 1] = $refNode;
+ } else {
+ // Append to the list of children
+ if ( is_string( $lastChild ) ) {
+ $children[$lastChildIndex] .= $encoded;
+ } else {
+ $children[] = $encoded;
+ }
+ }
+ }
+
+ /**
+ * Insert an element
+ *
+ * @param integer $preposition
+ * @param Element|SerializerNode|null $refElement
+ * @param Element $element
+ * @param bool $void
+ * @param integer $sourceStart
+ * @param integer $sourceLength
+ */
+ public function insertElement( $preposition, $refElement, Element $element, $void,
+ $sourceStart, $sourceLength
+ ) {
+ list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
+ $children =& $parent->children;
+ $lastChildIndex = count( $children ) - 1;
+ $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
+
+ if ( $element->userData ) {
+ // This element has already been inserted, this is a reparenting operation
+ $self = $element->userData;
+ $oldParent = $this->nodes[$self->parentId];
+ $oldChildren =& $oldParent->children;
+ $oldChildIndex = array_search( $self, $oldChildren, true );
+ if ( $oldChildIndex === false ) {
+ throw new SerializerError( "cannot find node to reparent: " .
+ $element->getDebugTag() );
+ }
+ // Remove from the old parent, update parent pointer
+ $oldChildren[$oldChildIndex] = '';
+ $self->parentId = $parent->id;
+ } else {
+ // Inserting an element which has not been seen before
+ $id = $this->nextNodeId++;
+ $self = new SerializerNode( $id, $parent->id, $element->namespace,
+ $element->name, $element->attrs, $void );
+ $this->nodes[$id] = $element->userData = $self;
+ }
+
+ if ( $preposition === TreeBuilder::BEFORE ) {
+ // Insert before element
+ if ( $lastChild !== $refNode ) {
+ $refIndex = array_search( $refNode, $children, true );
+ throw new SerializerError( "invalid insert position $refIndex/$lastChildIndex" );
+ }
+ $children[$lastChildIndex] = $self;
+ $children[$lastChildIndex + 1] = $refNode;
+ } else {
+ // Append to the list of children
+ $children[] = $self;
+ }
+ }
+
+ public function endTag( Element $element, $sourceStart, $sourceLength ) {
+ if ( $element->htmlName === 'head' || $element->isVirtual ) {
+ // <head> elements are immortal
+ return;
+ }
+ $self = $element->userData;
+ $parent = $this->nodes[$self->parentId];
+ $children =& $parent->children;
+ for ( $index = count( $children ) - 1; $index >= 0; $index-- ) {
+ if ( $children[$index] === $self ) {
+ unset( $this->nodes[$self->id] );
+ $children[$index] = $this->stringify( $parent, $self );
+ return;
+ }
+ }
+ // Ignore requests to end non-existent elements (this happens sometimes)
+ }
+
+ /**
+ * Serialize a specific node
+ *
+ * @param SerializerNode $parent The parent of $node
+ * @param SerializerNode $node The node to serialize
+ * @return string
+ */
+ private function stringify( SerializerNode $parent, SerializerNode $node ) {
+ if ( $node->void ) {
+ $contents = null;
+ } else {
+ $contents = '';
+ foreach ( $node->children as $childIndex => $child ) {
+ if ( is_string( $child ) ) {
+ $contents .= $child;
+ } else {
+ $contents .= $this->stringify( $node, $child );
+ }
+ }
+ }
+ return $this->formatter->element( $parent, $node, $contents );
+ }
+
+ public function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
+ $this->result .= $this->formatter->doctype( $name, $public, $system );
+ }
+
+ public function comment( $preposition, $refElement, $text, $sourceStart, $sourceLength ) {
+ list( $parent, $refNode ) = $this->interpretPlacement( $preposition, $refElement );
+ $encoded = $this->formatter->comment( $parent, $text );
+ $children =& $parent->children;
+ $lastChildIndex = count( $children ) - 1;
+ $lastChild = $lastChildIndex >= 0 ? $children[$lastChildIndex] : null;
+
+ if ( $preposition === TreeBuilder::BEFORE ) {
+ // Insert before element
+ if ( $lastChild !== $refNode ) {
+ throw new SerializerError( "invalid insert position" );
+ }
+ $children[$lastChildIndex] = $encoded;
+ $children[$lastChildIndex + 1] = $refNode;
+ } else {
+ // Append to the list of children
+ if ( is_string( $lastChild ) ) {
+ $children[$lastChildIndex] .= $encoded;
+ } else {
+ $children[] = $encoded;
+ }
+ }
+ }
+
+ public function error( $text, $pos ) {
+ if ( $this->errorCallback ) {
+ call_user_func( $this->errorCallback, $text, $pos );
+ }
+ }
+
+ public function mergeAttributes( Element $element, Attributes $attrs, $sourceStart ) {
+ $element->attrs->merge( $attrs );
+ if ( $element->userData instanceof SerializerNode ) {
+ $element->userData->attrs = $element->attrs;
+ }
+ }
+
+ public function removeNode( Element $element, $sourceStart ) {
+ $self = $element->userData;
+ $parent = $this->nodes[$self->parentId];
+ $children =& $parent->children;
+ for ( $index = count( $children ) - 1; $index >= 0; $index-- ) {
+ if ( $children[$index] === $self ) {
+ $children[$index] = '';
+ return;
+ }
+ }
+ throw new SerializerError( "cannot find element to remove" );
+ }
+
+ public function reparentChildren( Element $element, Element $newParent, $sourceStart ) {
+ $self = $element->userData;
+ $children = $self->children;
+ $self->children = [];
+ $this->insertElement( TreeBuilder::UNDER, $element, $newParent, false, $sourceStart, 0 );
+ $newParentNode = $newParent->userData;
+ $newParentId = $newParentNode->id;
+ foreach ( $children as $child ) {
+ if ( is_object( $child ) ) {
+ $child->parentId = $newParentId;
+ }
+ }
+ $newParentNode->children = $children;
+ }
+
+ /**
+ * Get a text representation of the current state of the serializer, for
+ * debugging.
+ *
+ * @return string
+ */
+ public function dump() {
+ $s = $this->stringify( $this->root, $this->root );
+ return substr( $s, 2, -3 ) . "\n";
+ }
+}