X-Git-Url: https://scripts.mit.edu/gitweb/autoinstallsdev/mediawiki.git/blobdiff_plain/19e297c21b10b1b8a3acad5e73fc71dcb35db44a..6932310fd58ebef145fa01eb76edf7150284d8ea:/vendor/wikimedia/remex-html/RemexHtml/Serializer/HtmlFormatter.php diff --git a/vendor/wikimedia/remex-html/RemexHtml/Serializer/HtmlFormatter.php b/vendor/wikimedia/remex-html/RemexHtml/Serializer/HtmlFormatter.php new file mode 100644 index 00000000..5445c1ef --- /dev/null +++ b/vendor/wikimedia/remex-html/RemexHtml/Serializer/HtmlFormatter.php @@ -0,0 +1,272 @@ + true, + 'base' => true, + 'basefont' => true, + 'bgsound' => true, + 'br' => true, + 'col' => true, + 'embed' => true, + 'frame' => true, + 'hr' => true, + 'img' => true, + 'input' => true, + 'keygen' => true, + 'link' => true, + 'menuitem' => true, + 'meta' => true, + 'param' => true, + 'source' => true, + 'track' => true, + 'wbr' => true, + ]; + + /** + * The elements which need a leading newline in their contents to be + * duplicated, since the parser strips a leading newline. + */ + protected $prefixLfElements = [ + 'pre' => true, + 'textarea' => true, + 'listing' => true + ]; + + /** + * The elements which have unescaped contents. + */ + protected $rawTextElements = [ + 'style' => true, + 'script' => true, + 'xmp' => true, + 'iframe' => true, + 'noembed' => true, + 'noframes' => true, + 'plaintext' => true, + ]; + /** + * The escape table for attribute values + */ + protected $attributeEscapes = [ + '&' => '&', + "\xc2\xa0" => ' ', + '"' => '"', + ]; + /** + * The escape table for text nodes + */ + protected $textEscapes = [ + '&' => '&', + "\xc2\xa0" => ' ', + '<' => '<', + '>' => '>', + ]; + + /** + * Attribute namespaces which have unqualified local names + */ + protected $unqualifiedNamespaces = [ + HTMLData::NS_HTML => true, + HTMLData::NS_MATHML => true, + HTMLData::NS_SVG => true, + ]; + + protected $useSourceDoctype; + protected $reverseCoercion; + + /** + * Constructor. + * + * @param array $options An associative array of options: + * - scriptingFlag : Set this to false to disable scripting. True by default. + * - useSourceDoctype : Emit the doctype used in the source. If this is + * false or absent, an HTML doctype will be used. + * - reverseCoercion : When formatting a DOM node, reverse the encoding + * of invalid names. False by default. + */ + public function __construct( $options = [] ) { + $options += [ + 'scriptingFlag' => true, + 'useSourceDoctype' => false, + 'reverseCoercion' => false, + ]; + if ( $options['scriptingFlag'] ) { + $this->rawTextElements['noscript'] = true; + } + $this->useSourceDoctype = $options['useSourceDoctype']; + $this->reverseCoercion = $options['reverseCoercion']; + } + + public function startDocument( $fragmentNamespace, $fragmentName ) { + return ""; + } + + public function characters( SerializerNode $parent, $text, $start, $length ) { + $text = substr( $text, $start, $length ); + if ( $parent->namespace !== HTMLData::NS_HTML + || !isset( $this->rawTextElements[$parent->name] ) + ) { + $text = strtr( $text, $this->textEscapes ); + } + return $text; + } + + public function element( SerializerNode $parent, SerializerNode $node, $contents ) { + $name = $node->name; + $s = "<$name"; + foreach ( $node->attrs->getValues() as $attrName => $attrValue ) { + $encValue = strtr( $attrValue, $this->attributeEscapes ); + $s .= " $attrName=\"$encValue\""; + } + $s .= '>'; + if ( $node->namespace === HTMLData::NS_HTML ) { + if ( isset( $contents[0] ) && $contents[0] === "\n" + && isset( $this->prefixLfElements[$name] ) + ) { + $s .= "\n$contents"; + } elseif ( !isset( $this->voidElements[$name] ) ) { + $s .= "$contents"; + } + } else { + $s .= "$contents"; + } + return $s; + } + + public function comment( SerializerNode $parent, $text ) { + return ""; + } + + public function doctype( $name, $public, $system ) { + return ''; + } + + public function formatDOMNode( \DOMNode $node ) { + $contents = ''; + if ( $node->firstChild ) { + foreach ( $node->childNodes as $child ) { + $contents .= $this->formatDOMNode( $child ); + } + } + + switch ( $node->nodeType ) { + case XML_ELEMENT_NODE: + return $this->formatDOMElement( $node, $contents ); + + case XML_DOCUMENT_NODE: + if ( !$this->useSourceDoctype ) { + return "" . $contents; + } else { + return $contents; + } + + case XML_DOCUMENT_FRAG_NODE: + return $contents; + + case XML_TEXT_NODE: + $text = $node->data; + $parent = $node->parentNode; + if ( $parent->namespaceURI !== HTMLData::NS_HTML + || !isset( $this->rawTextElements[$parent->nodeName] ) + ) { + $text = strtr( $text, $this->textEscapes ); + } + return $text; + + case XML_CDATA_SECTION_NODE: + $parent = $node->parentNode; + if ( $parent->namespaceURI === HTMLData::NS_HTML ) { + // CDATA is not allowed in HTML nodes + return $node->data; + } else { + return "data}]]>"; + } + + case XML_PI_NODE: + return "target} {$node->data}>"; + + case XML_COMMENT_NODE: + return ""; + + case XML_DOCUMENT_TYPE_NODE: + if ( $this->useSourceDoctype ) { + return "name}>"; + } else { + return ''; + } + + default: + return ''; + } + } + + public function formatDOMElement( \DOMElement $node, $contents ) { + $ns = $node->namespaceURI; + if ( $ns === null + || isset( $this->unqualifiedNamespaces[$ns] ) + || $node->prefix === null + ) { + $name = $node->localName; + } else { + $name = $node->prefix . ':' . $node->localName; + } + if ( $this->reverseCoercion ) { + $name = DOMUtils::uncoerceName( $name ); + } + + $s = '<' . $name; + foreach ( $node->attributes as $attr ) { + switch ( $attr->namespaceURI ) { + case HTMLData::NS_XML: + $attrName = 'xml:' . $attr->localName; + break; + case HTMLData::NS_XMLNS: + if ( $attr->localName === 'xmlns' ) { + $attrName = 'xmlns'; + } else { + $attrName = 'xmlns:' . $attr->localName; + } + break; + case HTMLData::NS_XLINK: + $attrName = 'xlink:' . $attr->localName; + break; + default: + if ( strlen( $attr->prefix ) ) { + $attrName = $attr->prefix . ':' . $attr->localName; + } else { + $attrName = $attr->localName; + } + } + if ( $this->reverseCoercion ) { + $attrName = DOMUtils::uncoerceName( $attrName ); + } + $encValue = strtr( $attr->value, $this->attributeEscapes ); + $s .= " $attrName=\"$encValue\""; + } + $s .= '>'; + if ( $ns === HTMLData::NS_HTML ) { + if ( isset( $contents[0] ) && $contents[0] === "\n" + && isset( $this->prefixLfElements[$name] ) + ) { + $s .= "\n$contents"; + } elseif ( !isset( $this->voidElements[$name] ) ) { + $s .= "$contents"; + } + } else { + $s .= "$contents"; + } + return $s; + } +}