true, 'base' => true, 'basefont' => true, 'bgsound' => true, 'br' => true, 'col' => true, 'embed' => true, 'frame' => true, 'hr' => true, 'img' => true, 'input' => true, 'keygen' => true, 'link' => true, 'menuitem' => true, 'meta' => true, 'param' => true, 'source' => true, 'track' => true, 'wbr' => true, ]; /** * The elements which need a leading newline in their contents to be * duplicated, since the parser strips a leading newline. */ protected $prefixLfElements = [ 'pre' => true, 'textarea' => true, 'listing' => true ]; /** * The elements which have unescaped contents. */ protected $rawTextElements = [ 'style' => true, 'script' => true, 'xmp' => true, 'iframe' => true, 'noembed' => true, 'noframes' => true, 'plaintext' => true, ]; /** * The escape table for attribute values */ protected $attributeEscapes = [ '&' => '&', "\xc2\xa0" => ' ', '"' => '"', ]; /** * The escape table for text nodes */ protected $textEscapes = [ '&' => '&', "\xc2\xa0" => ' ', '<' => '<', '>' => '>', ]; /** * Attribute namespaces which have unqualified local names */ protected $unqualifiedNamespaces = [ HTMLData::NS_HTML => true, HTMLData::NS_MATHML => true, HTMLData::NS_SVG => true, ]; protected $useSourceDoctype; protected $reverseCoercion; /** * Constructor. * * @param array $options An associative array of options: * - scriptingFlag : Set this to false to disable scripting. True by default. * - useSourceDoctype : Emit the doctype used in the source. If this is * false or absent, an HTML doctype will be used. * - reverseCoercion : When formatting a DOM node, reverse the encoding * of invalid names. False by default. */ public function __construct( $options = [] ) { $options += [ 'scriptingFlag' => true, 'useSourceDoctype' => false, 'reverseCoercion' => false, ]; if ( $options['scriptingFlag'] ) { $this->rawTextElements['noscript'] = true; } $this->useSourceDoctype = $options['useSourceDoctype']; $this->reverseCoercion = $options['reverseCoercion']; } public function startDocument( $fragmentNamespace, $fragmentName ) { return ""; } public function characters( SerializerNode $parent, $text, $start, $length ) { $text = substr( $text, $start, $length ); if ( $parent->namespace !== HTMLData::NS_HTML || !isset( $this->rawTextElements[$parent->name] ) ) { $text = strtr( $text, $this->textEscapes ); } return $text; } public function element( SerializerNode $parent, SerializerNode $node, $contents ) { $name = $node->name; $s = "<$name"; foreach ( $node->attrs->getValues() as $attrName => $attrValue ) { $encValue = strtr( $attrValue, $this->attributeEscapes ); $s .= " $attrName=\"$encValue\""; } $s .= '>'; if ( $node->namespace === HTMLData::NS_HTML ) { if ( isset( $contents[0] ) && $contents[0] === "\n" && isset( $this->prefixLfElements[$name] ) ) { $s .= "\n$contents"; } elseif ( !isset( $this->voidElements[$name] ) ) { $s .= "$contents"; } } else { $s .= "$contents"; } return $s; } public function comment( SerializerNode $parent, $text ) { return ""; } public function doctype( $name, $public, $system ) { return ''; } public function formatDOMNode( \DOMNode $node ) { $contents = ''; if ( $node->firstChild ) { foreach ( $node->childNodes as $child ) { $contents .= $this->formatDOMNode( $child ); } } switch ( $node->nodeType ) { case XML_ELEMENT_NODE: return $this->formatDOMElement( $node, $contents ); case XML_DOCUMENT_NODE: if ( !$this->useSourceDoctype ) { return "" . $contents; } else { return $contents; } case XML_DOCUMENT_FRAG_NODE: return $contents; case XML_TEXT_NODE: $text = $node->data; $parent = $node->parentNode; if ( $parent->namespaceURI !== HTMLData::NS_HTML || !isset( $this->rawTextElements[$parent->nodeName] ) ) { $text = strtr( $text, $this->textEscapes ); } return $text; case XML_CDATA_SECTION_NODE: $parent = $node->parentNode; if ( $parent->namespaceURI === HTMLData::NS_HTML ) { // CDATA is not allowed in HTML nodes return $node->data; } else { return "data}]]>"; } case XML_PI_NODE: return "target} {$node->data}>"; case XML_COMMENT_NODE: return ""; case XML_DOCUMENT_TYPE_NODE: if ( $this->useSourceDoctype ) { return "name}>"; } else { return ''; } default: return ''; } } public function formatDOMElement( \DOMElement $node, $contents ) { $ns = $node->namespaceURI; if ( $ns === null || isset( $this->unqualifiedNamespaces[$ns] ) || $node->prefix === null ) { $name = $node->localName; } else { $name = $node->prefix . ':' . $node->localName; } if ( $this->reverseCoercion ) { $name = DOMUtils::uncoerceName( $name ); } $s = '<' . $name; foreach ( $node->attributes as $attr ) { switch ( $attr->namespaceURI ) { case HTMLData::NS_XML: $attrName = 'xml:' . $attr->localName; break; case HTMLData::NS_XMLNS: if ( $attr->localName === 'xmlns' ) { $attrName = 'xmlns'; } else { $attrName = 'xmlns:' . $attr->localName; } break; case HTMLData::NS_XLINK: $attrName = 'xlink:' . $attr->localName; break; default: if ( strlen( $attr->prefix ) ) { $attrName = $attr->prefix . ':' . $attr->localName; } else { $attrName = $attr->localName; } } if ( $this->reverseCoercion ) { $attrName = DOMUtils::uncoerceName( $attrName ); } $encValue = strtr( $attr->value, $this->attributeEscapes ); $s .= " $attrName=\"$encValue\""; } $s .= '>'; if ( $ns === HTMLData::NS_HTML ) { if ( isset( $contents[0] ) && $contents[0] === "\n" && isset( $this->prefixLfElements[$name] ) ) { $s .= "\n$contents"; } elseif ( !isset( $this->voidElements[$name] ) ) { $s .= "$contents"; } } else { $s .= "$contents"; } return $s; } }