]> scripts.mit.edu Git - autoinstalls/mediawiki.git/blob - vendor/wikimedia/remex-html/RemexHtml/Serializer/HtmlFormatter.php
MediaWiki 1.30.2
[autoinstalls/mediawiki.git] / vendor / wikimedia / remex-html / RemexHtml / Serializer / HtmlFormatter.php
1 <?php
2
3 namespace RemexHtml\Serializer;
4 use RemexHtml\HTMLData;
5 use RemexHtml\DOM\DOMUtils;
6 use RemexHtml\DOM\DOMFormatter;
7
8 /**
9  * A formatter which follows the HTML 5 fragment serialization algorithm.
10  */
11 class HtmlFormatter implements Formatter, DOMFormatter {
12         /**
13          * The elements for which a closing tag is omitted.
14          */
15         protected $voidElements = [
16                 'area' => true,
17                 'base' => true,
18                 'basefont' => true,
19                 'bgsound' => true,
20                 'br' => true,
21                 'col' => true,
22                 'embed' => true,
23                 'frame' => true,
24                 'hr' => true,
25                 'img' => true,
26                 'input' => true,
27                 'keygen' => true,
28                 'link' => true,
29                 'menuitem' => true,
30                 'meta' => true,
31                 'param' => true,
32                 'source' => true,
33                 'track' => true,
34                 'wbr' => true,
35         ];
36
37         /**
38          * The elements which need a leading newline in their contents to be
39          * duplicated, since the parser strips a leading newline.
40          */
41         protected $prefixLfElements = [
42                 'pre' => true,
43                 'textarea' => true,
44                 'listing' => true
45         ];
46
47         /**
48          * The elements which have unescaped contents.
49          */
50         protected $rawTextElements = [
51                 'style' => true,
52                 'script' => true,
53                 'xmp' => true,
54                 'iframe' => true,
55                 'noembed' => true,
56                 'noframes' => true,
57                 'plaintext' => true,
58         ];
59         /**
60          * The escape table for attribute values
61          */
62         protected $attributeEscapes = [
63                 '&' => '&amp;',
64                 "\xc2\xa0" => '&nbsp;',
65                 '"' => '&quot;',
66         ];
67         /**
68          * The escape table for text nodes
69          */
70         protected $textEscapes = [
71                 '&' => '&amp;',
72                 "\xc2\xa0" => '&nbsp;',
73                 '<' => '&lt;',
74                 '>' => '&gt;',
75         ];
76
77         /**
78          * Attribute namespaces which have unqualified local names
79          */
80         protected $unqualifiedNamespaces = [
81                 HTMLData::NS_HTML => true,
82                 HTMLData::NS_MATHML => true,
83                 HTMLData::NS_SVG => true,
84         ];
85
86         protected $useSourceDoctype;
87         protected $reverseCoercion;
88
89         /**
90          * Constructor.
91          *
92          * @param array $options An associative array of options:
93          *   - scriptingFlag : Set this to false to disable scripting. True by default.
94          *   - useSourceDoctype : Emit the doctype used in the source. If this is
95          *     false or absent, an HTML doctype will be used.
96          *   - reverseCoercion : When formatting a DOM node, reverse the encoding
97          *     of invalid names. False by default.
98          */
99         public function __construct( $options = [] ) {
100                 $options += [
101                         'scriptingFlag' => true,
102                         'useSourceDoctype' => false,
103                         'reverseCoercion' => false,
104                 ];
105                 if ( $options['scriptingFlag'] ) {
106                         $this->rawTextElements['noscript'] = true;
107                 }
108                 $this->useSourceDoctype = $options['useSourceDoctype'];
109                 $this->reverseCoercion = $options['reverseCoercion'];
110         }
111
112         public function startDocument( $fragmentNamespace, $fragmentName ) {
113                 return "<!DOCTYPE html>";
114         }
115
116         public function characters( SerializerNode $parent, $text, $start, $length ) {
117                 $text = substr( $text, $start, $length );
118                 if ( $parent->namespace !== HTMLData::NS_HTML
119                         || !isset( $this->rawTextElements[$parent->name] )
120                 ) {
121                         $text = strtr( $text, $this->textEscapes );
122                 }
123                 return $text;
124         }
125
126         public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
127                 $name = $node->name;
128                 $s = "<$name";
129                 foreach ( $node->attrs->getValues() as $attrName => $attrValue ) {
130                         $encValue = strtr( $attrValue, $this->attributeEscapes );
131                         $s .= " $attrName=\"$encValue\"";
132                 }
133                 $s .= '>';
134                 if ( $node->namespace === HTMLData::NS_HTML ) {
135                         if ( isset( $contents[0] ) && $contents[0] === "\n"
136                                 && isset( $this->prefixLfElements[$name] )
137                         ) {
138                                 $s .= "\n$contents</$name>";
139                         } elseif ( !isset( $this->voidElements[$name] ) ) {
140                                 $s .= "$contents</$name>";
141                         }
142                 } else {
143                         $s .= "$contents</$name>";
144                 }
145                 return $s;
146         }
147
148         public function comment( SerializerNode $parent, $text ) {
149                 return "<!--$text-->";
150         }
151
152         public function doctype( $name, $public, $system ) {
153                 return '';
154         }
155
156         public function formatDOMNode( \DOMNode $node ) {
157                 $contents = '';
158                 if ( $node->firstChild ) {
159                         foreach ( $node->childNodes as $child ) {
160                                 $contents .= $this->formatDOMNode( $child );
161                         }
162                 }
163
164                 switch ( $node->nodeType ) {
165                 case XML_ELEMENT_NODE:
166                         return $this->formatDOMElement( $node, $contents );
167
168                 case XML_DOCUMENT_NODE:
169                         if ( !$this->useSourceDoctype ) {
170                                 return "<!DOCTYPE html>" . $contents;
171                         } else {
172                                 return $contents;
173                         }
174
175                 case XML_DOCUMENT_FRAG_NODE:
176                         return $contents;
177
178                 case XML_TEXT_NODE:
179                         $text = $node->data;
180                         $parent = $node->parentNode;
181                         if ( $parent->namespaceURI !== HTMLData::NS_HTML
182                                 || !isset( $this->rawTextElements[$parent->nodeName] )
183                         ) {
184                                 $text = strtr( $text, $this->textEscapes );
185                         }
186                         return $text;
187
188                 case XML_CDATA_SECTION_NODE:
189                         $parent = $node->parentNode;
190                         if ( $parent->namespaceURI === HTMLData::NS_HTML ) {
191                                 // CDATA is not allowed in HTML nodes
192                                 return $node->data;
193                         } else {
194                                 return "<![CDATA[{$node->data}]]>";
195                         }
196
197                 case XML_PI_NODE:
198                         return "<?{$node->target} {$node->data}>";
199
200                 case XML_COMMENT_NODE:
201                         return "<!--{$node->data}-->";
202
203                 case XML_DOCUMENT_TYPE_NODE:
204                         if ( $this->useSourceDoctype ) {
205                                 return "<!DOCTYPE {$node->name}>";
206                         } else {
207                                 return '';
208                         }
209
210                 default:
211                         return '';
212                 }
213         }
214
215         public function formatDOMElement( \DOMElement $node, $contents ) {
216                 $ns = $node->namespaceURI;
217                 if ( $ns === null
218                         || isset( $this->unqualifiedNamespaces[$ns] )
219                         || $node->prefix === null
220                 ) {
221                         $name = $node->localName;
222                 } else {
223                         $name = $node->prefix . ':' . $node->localName;
224                 }
225                 if ( $this->reverseCoercion ) {
226                         $name = DOMUtils::uncoerceName( $name );
227                 }
228
229                 $s = '<' . $name;
230                 foreach ( $node->attributes as $attr ) {
231                         switch ( $attr->namespaceURI ) {
232                         case HTMLData::NS_XML:
233                                 $attrName = 'xml:' . $attr->localName;
234                                 break;
235                         case HTMLData::NS_XMLNS:
236                                 if ( $attr->localName === 'xmlns' ) {
237                                         $attrName = 'xmlns';
238                                 } else {
239                                         $attrName = 'xmlns:' . $attr->localName;
240                                 }
241                                 break;
242                         case HTMLData::NS_XLINK:
243                                 $attrName = 'xlink:' . $attr->localName;
244                                 break;
245                         default:
246                                 if ( strlen( $attr->prefix ) ) {
247                                         $attrName = $attr->prefix . ':' . $attr->localName;
248                                 } else {
249                                         $attrName = $attr->localName;
250                                 }
251                         }
252                         if ( $this->reverseCoercion ) {
253                                 $attrName = DOMUtils::uncoerceName( $attrName );
254                         }
255                         $encValue = strtr( $attr->value, $this->attributeEscapes );
256                         $s .= " $attrName=\"$encValue\"";
257                 }
258                 $s .= '>';
259                 if ( $ns === HTMLData::NS_HTML ) {
260                         if ( isset( $contents[0] ) && $contents[0] === "\n"
261                                 && isset( $this->prefixLfElements[$name] )
262                         ) {
263                                 $s .= "\n$contents</$name>";
264                         } elseif ( !isset( $this->voidElements[$name] ) ) {
265                                 $s .= "$contents</$name>";
266                         }
267                 } else {
268                         $s .= "$contents</$name>";
269                 }
270                 return $s;
271         }
272 }